
clear all

set more off, permanently

* run this only once (it installs a packages that you will need):
* ssc install estout   // output the regression results to csv.
* ssc install reghdfe  // to do high-dimensional fixed effects regressions 
* ssc install egenmore  // this function corresponds with xtile (allowing for dividing data into medians, terciles, etc...)
* ssc install winsor2  // new package to help winsorize variables

* change to your input data folder address
global inny "C:\..."
* change to your output data folder address
global outty "C:\..."


/*!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
SET UP DATA
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/
/* ----------------------------------------------------------------*/
/* IMPORT CENSUS TRACT IDS-----------------------------------------*/
/* ----------------------------------------------------------------*/
/* 2010 Gazetteer file downloaded from U.S. Census Bureau 
https://www.census.gov/geographies/reference-files/time-series/geo/gazetteer-files.2010.html
Accessed on 4/5/2019 */
import delimited "$inny/Gaz_tracts_national.txt", stringcols(1,2) clear
rename geoid full_tract
*keep if usps == "TX"

* save the universe of census tract codes
save "$inny/tracts" , replace

/* 2010 Census Block Codes by State (Texas) from the Federal Communications Commission
https://www.fcc.gov/general/census-blocks-state
Accessed on 4/5/2019 */

import delim "$inny/Texas.csv", stringcols(1,2,3,4,5,6,7,8) clear
drop state county cnamelong tract tractname block tractcode
rename blockcode tract_block

* save the universe of TX census tract codes
save "$inny/tx_blocks" , replace

/* ----------------------------------------------------------------*/
/* GIS DATA--------------------------------------------------------*/
/* ----------------------------------------------------------------*/
use "$inny/gis_and_demo", clear
*prepare gis data for merge
* since data will be merged by county & track & block combinations, we will combine them here
g tract_block = STATEFP10 + COUNTYFP10 + TRACTCE10 + BLOCKCE10
destring tract_block, generate(tractblock) 
*About 40% of census 2010 blocks have zero population. So I think the data is correct: http://tumblr.mapsbynik.com/post/82791188950/nobody-lives-here-the-nearly-5-million-census
drop if tot_pop<=10
drop if Tdev_area==0
* dropping unnecessary or respecifying variables
drop share_Tdev_area_anyflFP
drop share_Tdev_area_fldepthQ1 share_Tdev_area_fldepthQ2 share_Tdev_area_fldepthQ3 share_Tdev_area_fldepthQ4 // we want these binary not continous
drop quart_flood_level // this isn't exactly what we want because we want the first dummy to include all of the zero flooding blocks and the quartiles to be formed from the non-zero stuff. So, I replace it per below
drop share_Tdev_area_fldepthFPQ1 share_Tdev_area_fldepthFPQ2 share_Tdev_area_fldepthFPQ3 share_Tdev_area_fldepthFPQ4  // we want these binary not continous
* Define flooding
* zero flood + 3 quartile dummies
su share_Tdev_area_anyfl, detail
g share_Tdev_area_anyfl_temp = share_Tdev_area_anyfl
replace share_Tdev_area_anyfl_temp=. if share_Tdev_area_anyfl==0
xtile share_Tdev_area_anyfl_tempT = share_Tdev_area_anyfl_temp, n(3)
replace share_Tdev_area_anyfl_tempT=0 if share_Tdev_area_anyfl==0
*  dummies
g i1_share_dev_anyfl = (share_Tdev_area_anyfl_tempT==0) 
g i2_share_dev_anyfl = (share_Tdev_area_anyfl_tempT==1) 
g i3_share_dev_anyfl = (share_Tdev_area_anyfl_tempT==2) 
g i4_share_dev_anyfl = (share_Tdev_area_anyfl_tempT==3) 
label variable i1_share_dev_anyfl "(1) 0 Flooded % of dev land area"
label variable i2_share_dev_anyfl "(2) q1 Flooded % of dev land area"
label variable i3_share_dev_anyfl "(3) q2 Flooded % of dev land area"
label variable i4_share_dev_anyfl "(4) q3 Flooded % of dev land area"
*  group
g group_share_dev_anyfl = "" 
replace group_share_dev_anyfl = "(1) 0" if i1_share_dev_anyfl==1
replace group_share_dev_anyfl = "(2) q1" if i2_share_dev_anyfl==1
replace group_share_dev_anyfl = "(3) q2" if i3_share_dev_anyfl==1
replace group_share_dev_anyfl = "(4) q3" if i4_share_dev_anyfl==1
label variable group_share_dev_anyfl "bins of Flooded % of dev land area"
* make a numbered version
g ngroup_share_dev_anyfl = .
replace ngroup_share_dev_anyfl = 1 if i1_share_dev_anyfl==1
replace ngroup_share_dev_anyfl = 2 if i2_share_dev_anyfl==1
replace ngroup_share_dev_anyfl = 3 if i3_share_dev_anyfl==1
replace ngroup_share_dev_anyfl = 4 if i4_share_dev_anyfl==1
label variable ngroup_share_dev_anyfl "bins of Flooded % of dev land area"
drop share_Tdev_area_anyfl_tempT share_Tdev_area_anyfl_temp
tab group_share_dev_anyfl
rename share_Tdev_area_anyfl share_dev_anyfl
* avg flood depth (not weighted)
g Avg_flood_depth_temp = Avg_flood_depth
replace Avg_flood_depth_temp=. if Avg_flood_depth==0
xtile Avg_flood_depth_tempQ = Avg_flood_depth_temp, n(3) // Cuts terciles after excluding the zeros
replace Avg_flood_depth_tempQ=0 if Avg_flood_depth==0    // then overwrites the missing with a new quartile labelled "0"
*  dummies
g i1_avg_flood_depth = (Avg_flood_depth_tempQ==0 )  // dummy that equals 1 if less than 5% of the block flooded
g i2_avg_flood_depth = (Avg_flood_depth_tempQ==1 ) 
g i3_avg_flood_depth = (Avg_flood_depth_tempQ==2 ) 
g i4_avg_flood_depth = (Avg_flood_depth_tempQ==3 ) 
label variable i1_avg_flood_depth "(1) 0 avg flood level"
label variable i2_avg_flood_depth "(2) Q1 avg flood level"
label variable i3_avg_flood_depth "(3) Q2 avg flood level"
label variable i4_avg_flood_depth "(4) Q3 avg flood level"
*  group
g group_avg_flood_depth = "" 
replace group_avg_flood_depth = "(1) 0" if i1_avg_flood_depth==1
replace group_avg_flood_depth = "(2) Q1" if i2_avg_flood_depth==1
replace group_avg_flood_depth = "(3) Q2" if i3_avg_flood_depth==1
replace group_avg_flood_depth = "(4) Q3" if i4_avg_flood_depth==1
label variable group_avg_flood_depth "bins of max flood level"
* make a numbered version
g ngroup_avg_flood_depth = .
replace ngroup_avg_flood_depth = 1 if i1_avg_flood_depth==1
replace ngroup_avg_flood_depth = 2 if i2_avg_flood_depth==1
replace ngroup_avg_flood_depth = 3 if i3_avg_flood_depth==1
replace ngroup_avg_flood_depth = 4 if i4_avg_flood_depth==1
label variable ngroup_avg_flood_depth "bins of avg flood level"
drop Avg_flood_depth_temp Avg_flood_depth_tempQ
tab group_avg_flood_depth
rename Avg_flood_depth avg_flood_depth 

* TREATEMENT VARIABLE the average depth of flooding across the developed part of the block (includes parts that didn't flood) 
label variable w_flood_depth "weighted avg flood depth across the developed block (no flooding = 0)"
su w_flood_depth, detail
g w_flood_depth_temp = w_flood_depth
replace w_flood_depth_temp=. if w_flood_depth==0
xtile w_flood_depth_tempQ = w_flood_depth_temp, n(3) // Cuts terciles after excluding the zeros
replace w_flood_depth_tempQ=0 if w_flood_depth==0    // then overwrites the missing with a new quartile labelled "0"
* treatment dummies
g i1_w_flood_depth = (w_flood_depth_tempQ==0 )  // dummy that equals 1 if less than 5% of the block flooded
g i2_w_flood_depth = (w_flood_depth_tempQ==1 ) 
g i3_w_flood_depth = (w_flood_depth_tempQ==2 ) 
g i4_w_flood_depth = (w_flood_depth_tempQ==3 ) 
label variable i1_w_flood_depth "(1) 0 wavg flood depth across dev"
label variable i2_w_flood_depth "(2) Q1 wavg flood depth across dev"
label variable i3_w_flood_depth "(3) Q2 wavg flood depth across dev"
label variable i4_w_flood_depth "(4) Q3 wavg flood depth across dev"
* treatement group
g group_w_flood_depth = "" 
replace group_w_flood_depth = "(1) 0" if i1_w_flood_depth==1
replace group_w_flood_depth = "(2) Q1" if i2_w_flood_depth==1
replace group_w_flood_depth = "(3) Q2" if i3_w_flood_depth==1
replace group_w_flood_depth = "(4) Q3" if i4_w_flood_depth==1
label variable group_w_flood_depth "bins of wavg flood depth across dev"
* make a numbered version
g ngroup_w_flood_depth = .
replace ngroup_w_flood_depth = 1 if i1_w_flood_depth==1
replace ngroup_w_flood_depth = 2 if i2_w_flood_depth==1
replace ngroup_w_flood_depth = 3 if i3_w_flood_depth==1
replace ngroup_w_flood_depth = 4 if i4_w_flood_depth==1
label variable ngroup_w_flood_depth "bins of wavg flood depth across dev"
drop w_flood_depth_temp w_flood_depth_tempQ
tab group_w_flood_depth
tab group_w_flood_depth, summarize(w_flood_depth)

/*Portion of developed Block land area that has been developed AND is in a floodplain  */
/* (FP Area & Dev Area / Dev Area) */
* build the continous interactor
g share_fpdev_devarea = Tdev_FP / Tdev_area
label variable share_fpdev_devarea "Floodplain share of developed area (FP Area & Dev Area) / Dev Area"
* converting to binary 1 = if over 25% of developed land is in the flood plain 
su share_fpdev_devarea, detail
g i25pct_share_fpdev_devarea=.
replace i25pct_share_fpdev_devarea=0 if share_fpdev_devarea<.25
replace i25pct_share_fpdev_devarea=1 if share_fpdev_devarea>=.25
label variable i25pct_share_fpdev_devarea "indicator that over 25% of dev land is in floodplain"
* converting to binary 1 = if over 50% of developed land is in the flood plain (roughly 85th percentile)
g i50pct_share_fpdev_devarea=.
replace i50pct_share_fpdev_devarea=0 if share_fpdev_devarea<.5
replace i50pct_share_fpdev_devarea=1 if share_fpdev_devarea>=.5
label variable i50pct_share_fpdev_devarea "indicator that over 50% of dev land is in floodplain"
* make a population density variable
g density = tot_pop/total_block_area
label variable density "persons per sq meter"
* ADJUSTING THE DEMOGRAPHIC VARIABLES IN THE GIS DATA
g share_college_degree = estimatetotalbachelorsdegree/estimatetotal // block group level share
su share_college_degree, detail
g share_poverty = estimateincomeinthepast12monthsb/estimatetotal // block group level share
su share_poverty, detail
g share_black=(owneroccupiedhouseholderwhoisbla+renteroccupiedhouseholderwhoisbl)/tot_hse
su share_black, detail
g share_white=(owneroccupiedhouseholderwhoiswhi+renteroccupiedhouseholderwhoiswh)/tot_hse
su share_white, detail
g share_owner_occupied = owneroccupied/tot_hse
su share_owner_occupied, detail
renpfix estimate // removes the word "estimate" from the prefix of variable names to try to shorten the name
*g minority_share = 1-share_white
egen median_minority_share = xtile(minority_share), nq(2)  // cuts the data at the median of average block income (1- below; 2=above)
g high_minority_share = (median_minority_share==2)  // creats a binary version 
drop median_minority_share
* more demographic dummies
egen median_share_college_degree = xtile(share_college_degree), nq(2)  // cuts the data at the median of average block income (1- below; 2=above)
g high_share_college_degree = (median_share_college_degree==2)  // creats a binary version 
g low_share_college_degree = (median_share_college_degree==1)  // creats a binary version 
drop median_share_college_degree
*Cut 2017q2 values of block income and share dev flood plain into medians before merging with CCP
egen median_share_owner_occupied = xtile(share_owner_occupied), nq(2)  // cuts the data at the median of riskscore (1- below; 2=above)
g high_owneroccupied = (median_share_owner_occupied==2)  // creats a binary version 
drop median_share_owner_occupied
egen median_blockincome = xtile(medianhouseholdincomeint), nq(2)  // cuts the data at the median of riskscore (1- below; 2=above)
g high_blockincome = (median_blockincome==2)  // creats a binary version 
g low_blockincome = (median_blockincome==1)  
drop median_blockincome
egen median_houseprice = xtile(medianvaluedollars), nq(2)  // cuts the data at the median of average block income (1- below; 2=above)
g high_blockhouseprice = (median_houseprice==2)  // creats a binary version 
drop median_houseprice
* Divide blocks by flp status
g fp_in_devblock = (!missing(share_fpdev_devarea) & share_fpdev_devarea>0)
g fp_in_50pct_devblock = (!missing(share_fpdev_devarea) & share_fpdev_devarea>.50) // changed to a quarter due to too few people not in hardship living in the floodplain
egen median_sharefpdevarea = xtile(share_fpdev_devarea), nq(2)  // cuts the data at the median of riskscore (1- below; 2=above)
g high_sharefpdevarea = (median_sharefpdevarea==2)  // creats a binary version 
drop median_sharefpdevarea
rename income avg_income_hmda
rename minority minority_share_hmda
rename floodinsurance floodins_share_femareg
* Test whether results change when Fort Bend County (County = 157) is dropped (since the flood depth information was missing for portions of the county)
*drop if county==157
* saving the altered data so that it can be used later in its altered form
save "$inny/gis_edited" , replace



/* ----------------------------------------------------------------*/
/* FEDERAL RESERVE BANK OF NY/EQUIFAX CONSUMER CREDIT PANEL DATA---*/
/* ----------------------------------------------------------------*/
/* NOTE: This is PSUEDO-data, it is NOT the actual NYFED/Equifax Consumer Credit Panel data (CCP) 
used for the analysis featured within the paper. The actual data is proprietary and cannot be 
released externally. This data is generated from the empirical distributions of the actual student 
debt information in the CCP for people with student debt. 

After row 678524, we simply generate zero balance observations that reflect the fact that not 
everyone has student debt. The pseudo data is not representative of the underlying data for people 
without student debt. 

The psudo data also does not include extranious credit variables that are not of primary interest 
(e.g. mortgage balance). It will give a general sense of how each student debt variable is 
distributed in the aggregate for people with student debt. 

This data is not designed to replicate the actual results in the paper. The data is designed to
"illustrate the format of the files read by the code so that users can better understand the code." 
per the RFS code sharing policy established in July 1, 2020. */

/*
Note in the real dataset, we use loan-level disaggregated student loan data for each consumer (up to a
maximum of 20 loans per consumer, per quarter) and aggregate after the fact,
rather than import the aggregated data from the main CCP dataset. This offers a
more accurate accounting because the student loan data in the main CCP dataset 
is missing certain types of student loans. While this approach obscures
information for individuals with greater than 20 student loan accounts, the
inperson_idence of such cases is low. 
*/

* Import loan-level student loan data
import delim using "$inny/Psuedo Student Loan Data.csv", stringcols(1 2 3 4 5) clear
sort person_id date
g state_code = "48"
g tract_block = state_code + county_code + census_tract_2010 + census_block_2010
g full_tract = state_code + county_code + census_tract_2010

*GIS IS MERGED TO CCP
/* NOTE: The psuedo data has far less geographic coverage than the analytic data. */
merge m:1 tract_block  using "$inny/gis_edited" //, force
drop if _merge == 2  // we can, however, drop any GIS blocks that don't have any CCP obs


/* translate the CCP qtr into a STATA date format*/
gen qtr2 = date(qtr, "MDY")
format qtr2 %td
* a better format for graphs
gen yq = qofd(qtr2)
format yq %tq
gen year = substr(date, 1, 4)
* drop extreme ages (dead people and people too young to be considering college)
 drop if age < 15 | age > 100 

sort person_id qtr2
by person_id: gen nqtr = [_N] // count the number of quarters that each person_id has observations for
egen max_nqtr = max(nqtr) // calculate the maximum number of quarters per person_id in our dataset

* CCP variable names have been changed in order to protect the codebook (as requested by Equifax)
/* create the outcome variables of interest and give them labels */
* Equifax Risk Score 3.0
label variable riskscore "Equifax Risk Score 3.0"

*student debt balance
*replace balance = 0 if missing(balance) 
label variable balance "Student Bal"

/* NOTE: For simplicity, the psuedo data only include student loan holders. In
the analytic dataset there are both student loan borrowers and nonborrowers. */
* indicator for non-zero balance
g ibalance = (!missing(balance) & balance > 0) * 100
label variable ibalance "I(Student debt>0)"

/* NOTE: Pseudo data is winsorized prior during psuedo data generation.
The CCP data student loan balances were winsorized using this code at the 98th percentile of non-missing values by quarter
to deal with extreme positive values as detailed in a footnote of the paper */
// foreach b of varlist ///
// 	balance  {
// 	winsor2 `b' if !missing(`b') & `b' > 0 , by(qtr2) cuts(0 98) replace // winsorizing the top 2% of non-zero balances by date
// 	replace `b' = 0 if missing(`b') // replacing missing balances with zeros
// }

save "$inny/tempSTUD2", replace  


/*--------------------------------------------------------------*/
/* CREATE A TREATMENT ID DATASET */
/* record and save the IDs of  people living in a houston census block as of the 
last quarter before the hurricane */
use "$inny/tempSTUD2", clear
keep if _merge == 3 // IF the person_id is in a Houston GIS block
drop _merge
keep if qtr2 == mdy(6,1,2017)  
drop qtr2
*Condition the treatment sample on the ages of interest as of the dawn of the hurricane q2 2017
drop if age < 18 | age > 100 
*make sure the block is not missing any important census demographics
keep if !missing(medianhouseholdincomeint)
g riskscore_2017q2 = riskscore
g balance_2017q2 = balance
g ibalance_2017q2 = ibalance/100
egen balance_2017q2MED = xtile(balance_2017q2) if balance_2017q2 > 0 , nq(2)  
*  age cut 1 (<24 is primary group of interest)
g age_cut = .
replace age_cut=1 if age>=18 & age<24  // 18-23 year olds 
replace age_cut=2 if age>=24 & age<30  // 24-29 year olds
replace age_cut=3 if age>=30 & age<35  // 30-34 year olds
replace age_cut=4 if age>=35 & age<50  // 35-49 year olds
replace age_cut=5 if age>=50 // 50 and over
replace age_cut = . if missing(age)
* age cut 2 (<26 is primary group of interest for more restricted hazard sample that drops people after the period in which they open a student loan account) 
g age__cut = .
replace age__cut=1 if age<26
replace age__cut=2 if age>=26 & age<45 
replace age__cut=3 if age>=45
replace age__cut = . if missing(age)
*Cut 2017q2 values of creditscore into medians before remerging with CCP
* cuts the data at the median of riskscore (1= below; 2=above)
egen median_riskscore = xtile(riskscore), nq(2)  
g high_riskscore_2017q2 = (median_riskscore == 2)  // creats a binary version 
g low_riskscore_2017q2 = (median_riskscore == 1)
replace high_riskscore_2017q2 = . if missing(median_riskscore)
replace low_riskscore_2017q2 = . if missing(median_riskscore)
drop median_riskscore
*Take the block level variables and make the 2017q2 specific for individuals
g high_minority_share_2017q2 = high_minority_share
g high_owneroccupied_2017q2 = high_owneroccupied
g high_blockincome_2017q2  = high_blockincome
g low_blockincome_2017q2  = low_blockincome
g high_blockhouseprice_2017q2  = high_blockhouseprice
g fp_in_devblock_2017q2  = fp_in_devblock
g fp_in_50pct_devblock_2017q2  = fp_in_50pct_devblock
g high_sharefpdevarea_2017q2  = high_sharefpdevarea
g medianhouseholdincome_2017q2 = medianhouseholdincomeint
g minority_share_2017q2 = minority_share
g share_college_degree_2017q2 = share_college_degree
g share_owner_occupied_2017q2 = share_owner_occupied
g medianvaluedollars_2017q2 = medianvaluedollars
g share_fpdev_devarea_2017q2 = share_fpdev_devarea
g high_share_college_degree_2017q2 = high_share_college_degree
g low_share_college_degree_2017q2 = low_share_college_degree
* constrained just means low income (using the word constrained as a sort of macro variable that can be easily changed here without having to change all the specifications)
g constrained_2017q2 = .
replace constrained_2017q2 = 1 if low_blockincome_2017q2==1  
replace constrained_2017q2 = 0 if low_blockincome_2017q2==0
/* keep only the relevant GIS variables */
keep person_id  tract_block-constrained_2017q2
/* this is block the person was in at the dawn of the hurricane (we will cluster our standard errors on this) */
rename tract_block tract_block_2017q2
rename TRACTCE10 tract_2017q2
* test for duplicates
sort person_id
drop if missing(person_id) /* 0 observations dropped */
quietly by person_id: gen dup = cond(_N == 1,0,_n)
drop if dup > 1 /* 0 observations dropped */
drop dup
save "$inny/treatedIDsSTUD" , replace



/*--------------------------------------------------------------*/
/* CREATE A MEASURE OF INCREASING STUDENT DEBT PRE-HURRICANE (indicates ongoing schooling, NOT USED) */
use "$inny/tempSTUD2", clear
keep if _merge == 3 // IF the person_id is in a Houston GIS block
drop _merge
keep if qtr2 == mdy(6,1,2016)  
drop qtr2
*make sure the block is not missing any important census demographics
keep if !missing(medianhouseholdincomeint) 
g balance_2016q2 = balance
replace balance_2016q2=0 if missing(balance_2016q2)
keep person_id  balance_2016q2
*drop duplicates
sort person_id
drop if missing(person_id) /* 0 observations dropped */
quietly by person_id: gen dup = cond(_N == 1,0,_n)
drop if dup > 1 /* 0 observations dropped */
drop dup
save "$inny/treatedIDs_2016q2" , replace
* MERGE with Q22017 treatmentID dataset
merge m:1  person_id  using "$inny/treatedIDsSTUD" //, force
keep if _merge==3 | _merge==2   
drop _merge
g istud_increase = (balance_2017q2>balance_2016q2 & !missing(balance_2017q2))
save "$inny/treatedIDsSTUD" , replace
/*--------------------------------------------------------------*/


*---------------------------------------------------------------
* CREATE A BLOCK-LEVEL Q2 2017 MEDIAN EQUIFAX RISK SCORE AND TOTAL STUDENT DEBT MEASURE (NOT USED IN ANALYSIS)
use "$inny/treatedIDsSTUD" , clear
g persons=1 
collapse (median) medianblock_riskscore_2017q2 = riskscore_2017q2 medblock_balance_2017q2 = balance_2017q2 ///
		(mean) meanblock_riskscore_2017q2 = riskscore_2017q2 meanblock_balance_2017q2 = balance_2017q2 meanblock_constrained_2017q2 = constrained_2017q2  ///
		(sum) sumblock_constrained_2017q2 = constrained_2017q2 persons, by(tract_block_2017q2) 
*riskscore indicator by block
egen g_medianblock_riskscore_2017q2 = xtile(medianblock_riskscore_2017q2), nq(2)  // cuts the data at the median of riskscore (1- below; 2=above)
g high_block_riskscore_2017q2 = (g_medianblock_riskscore_2017q2==2)  // creates a binary version 
replace high_block_riskscore_2017q2 = . if missing(g_medianblock_riskscore_2017q2)
drop g_medianblock_riskscore_2017q2
*constraint indicator by block
egen g_medianblock_constrained_2017q2 = xtile(meanblock_constrained_2017q2), nq(2)  // cuts the data at the median of riskscore (1- below; 2=above)
g highblock_constrained_2017q2 = (g_medianblock_constrained_2017q2==2)  // creats a binary version 
replace highblock_constrained_2017q2 = . if missing(g_medianblock_constrained_2017q2)
drop g_medianblock_constrained_2017q2
save "$inny/blockmediancreditSTUD" , replace
*--------------------------------------------------------------

/*--------------------------------------------------------------*/
/* Get the mean CCP variable of each block during the pre-period, then merge it in based on whatever block the person lived in as of 2017q2 (NOT USED IN ANALYSIS) */
use "$inny/tempSTUD2" , clear
keep if qtr2 <= mdy(6,1,2017)  
drop if missing(tract_block) 
drop if missing(person_id) 
preserve
collapse(max) meanblock_riskscore_pre = riskscore  ///
		meanblock_balance_pre = balance, by(tract_block) 
rename tract_block tract_block_2017q2
save "$inny/premeancreditSTUD" , replace
restore
collapse(max) riskscore_pre = riskscore   ///
		 balance_pre = balance , by(person_id) 
save "$inny/preindivcreditSTUD" , replace
/*--------------------------------------------------------------*/


/*--------------------------------------------------------------*/
* MAIN DATASET
* MERGE TREATED person_id + Q22017 GIS DATA ONTO EACH person_id IN THE CCP
use "$inny/tempSTUD2" , clear
drop _merge
drop STATEFP10-high_sharefpdevarea  // drop the old GIS data so that the 2017q2-specific data can be merged back in since we do not want a bad controls situation
* merge treatedIDs into the original ccp-gis data
merge m:1 person_id  using "$inny/treatedIDsSTUD" 
keep if _merge == 3  // ONLY KEEP person_ids that were in the treated (possible affected by the hurriane)
drop _merge
merge m:1 person_id using "$inny/preindivcreditSTUD" 
keep if _merge == 3  
drop _merge
* merge in 2017q2 block-level median credit variables
merge m:1 tract_block_2017q2 using "$inny/blockmediancreditSTUD" 
keep if _merge == 3  
drop _merge
* merge in pre period block-level median credit variables
merge m:1  tract_block_2017q2  using "$inny/premeancreditSTUD" 
keep if _merge==3  
drop _merge
sort person_id qtr2 
* TIME DUMMIES FOR USE IN THE REGRESSIONS
g POSTt =  (qtr2 > mdy(6,1,2017))
g POSTt_yr1 =  (qtr2 > mdy(6,1,2017) & qtr2 <= mdy(6,1,2018))
g POSTt_yr23 =  (qtr2 > mdy(6,1,2018) )
* must create year quarter dummies
	gen d2015q1 = (qtr == "2015-03-01")
	gen d2015q2 = (qtr == "2015-06-01")
	gen d2015q3 = (qtr == "2015-09-01")
	gen d2015q4 = (qtr == "2015-12-01")
	gen d2016q1 = (qtr == "2016-03-01")
	gen d2016q2 = (qtr == "2016-06-01")
	gen d2016q3 = (qtr == "2016-09-01")
	gen d2016q4 = (qtr == "2016-12-01")
	gen d2017q1 = (qtr == "2017-03-01")
	gen d2017q2 = (qtr == "2017-06-01")
	gen d2017q3 = (qtr == "2017-09-01")
	gen d2017q4 = (qtr == "2017-12-01")
	gen d2018q1 = (qtr == "2018-03-01")
	gen d2018q2 = (qtr == "2018-06-01")
	gen d2018q3 = (qtr == "2018-09-01")
	gen d2018q4 = (qtr == "2018-12-01")
	gen d2019q1 = (qtr == "2019-03-01")
	gen d2019q2 = (qtr == "2019-06-01")
	gen d2019q3 = (qtr == "2019-09-01")
	gen d2019q4 = (qtr == "2019-12-01")
	gen d2020q1 = (qtr == "2020-03-01")
	gen d2020q2 = (qtr == "2020-06-01")
	gen d2020q3 = (qtr == "2020-09-01")
	gen d2020q4 = (qtr == "2020-12-01")
	gen d2021q1 = (qtr == "2021-03-01")
	gen d2021q2 = (qtr == "2021-06-01")
* generate age cubic and engineering cubic
g age_2 = age*age
g age_3 = age*age*age
g share_Tarea_FP_2 = share_Tarea_FP*share_Tarea_FP
g share_Tarea_FP_3 = share_Tarea_FP_2*share_Tarea_FP
g share_dev_area_2 = share_dev_area*share_dev_area
g share_dev_area_3 = share_dev_area*share_dev_area*share_dev_area
g elevation_ft_2 = elevation_ft*elevation_ft
g elevation_ft_3 = elevation_ft_2*elevation_ft
g dist_stream_ft_2 = dist_stream_ft*dist_stream_ft
g dist_stream_ft_3 = dist_stream_ft_2*dist_stream_ft
g share_fpdev_devarea_2 = share_fpdev_devarea*share_fpdev_devarea
g share_fpdev_devarea_3 = share_fpdev_devarea*share_fpdev_devarea*share_fpdev_devarea
* because reghdfe program can't handle strings
g tract_block_2017q2_ = real(tract_block_2017q2)
egen long person_id_ = group(person_id)
egen long ngroup_w_flood_depth_ = group(ngroup_w_flood_depth)
 /*block x post interactions */
g POSTt_x_income = POSTt*medianhouseholdincomeint
g POSTt_x_lo_income = POSTt*low_blockincome_2017q2
g POSTt_x_ownerocc = POSTt*share_owner_occupied
g POSTt_x_hi_ownerocc = POSTt*high_owneroccupied_2017q2 
g POSTt_x_minority = POSTt*minority_share 
g POSTt_x_hi_minority = POSTt*high_minority_share_2017q2
g POSTt_x_density = POSTt*density
g POSTt_x_homeval = POSTt*medianvaluedollars
g POSTt_x_FLP = POSTt*share_fpdev_devarea
g POSTt_x_riskscore = POSTt*medianblock_riskscore_2017q2
g POSTt_x_hi_college = POSTt*high_share_college_degree_2017q2
g POSTt_x_lo_college = POSTt*low_share_college_degree_2017q2
/* block x date interactions */
g income = medianhouseholdincomeint
g ownerocc = share_owner_occupied
g minority = minority_share
g homeval = medianvaluedollars
g FLP = share_fpdev_devarea
* keep 6 quarters pre hurricane
drop if qtr2 < mdy(12,01,2015)  
save "$inny/ccp_gis_prepped_data_0STUD" , replace
/*--------------------------------------------------------------*/



/*--------------------------------------------------------------*/
* CREATE A COLLAPSED BLOCK-LEVEL MIGRATION DATASET 
* (Only used to form the block-level variables used in the descriptive statistics (Table 1, Table 2.A) and the out-migration estimates referenced in the text
use "$inny/tempSTUD2", clear
cap drop date0
*count the number of sample people living in the block during a given quarter
preserve
* everybody gets a value of 1 that will be cumulated to the block level to calculate sample by block
g person = 1
collapse (sum) person, by(tract_block qtr2)  
sort tract_block qtr2
save "$inny/tempSTUD_1", replace  
restore
* Then we want to find how many of those people moved out of houston
keep person_id qtr2 tract_block county_code  
* create variables capturing the geographic location of the person_id during the prior quarter
tsset, clear
egen long person_id_ = group(person_id)
* tell stata that person_id is the id variable and qtr2 is the time variable
tsset person_id_ qtr2
sort person_id_ qtr2 
* find the block the person lived in during the prior quarter
by person_id_: gen lag_tract_block = tract_block[_n-1]
* find the county the person lived in during the prior quarter
by person_id_: gen lag_county_code = county_code[_n-1]
* lable the prior quarter "date0"
by person_id_: gen date0 = qtr2[_n-1]
format date0 %td
* make sure that the lag is set to missing for the first quarterly observation for a given person_id
replace lag_tract_block = "" if missing(date0)
replace lag_county_code = "" if missing(date0)
order person_id qtr2 date0 tract_block lag_tract_block county_code lag_county_code 
sort person_id qtr2 
* identify people who were in houston last quarter and now left houston
g moved_outhouston = 0
replace moved_outhouston = 1 if ///
	!inlist(county_code, "015", "039", "071", "157", "167", "201", "291", "339", "473") & ///
	inlist(lag_county_code, "015", "039", "071", "157", "167", "201", "291", "339", "473") 
g moved_wihouston = 0
replace moved_wihouston = 1 if lag_tract_block != tract_block & ///
	inlist(county_code, "015", "039", "071", "157", "167", "201", "291", "339", "473") & ///
	inlist(lag_county_code, "015", "039", "071", "157", "167", "201", "291", "339", "473") 
* shift these people into the block and quarter prior to their move
drop tract_block qtr2
rename lag_tract_block tract_block
rename date0 qtr2
drop if missing(qtr2)
cap drop date0
* sum up all the people that moved by the block and quarter they were in prior to their move
collapse (sum) moved_outhouston moved_wihouston, by(tract_block qtr2) 
sort tract_block qtr2
* Calculate the share of the block that moved out (either within Houston or out of Houston)
merge m:1 tract_block qtr2 using "$inny/tempSTUD_1" 
keep if _merge == 3
drop _merge 
* now sum up the number of people who move out of the block the next quarter
g p_outhoustonmig = (moved_outhouston/person)*100 // quarterly migration rate of houston
label variable p_outhoustonmig "block quarterly out-houston migration rate (%)"
g p_inhoustonmig = (moved_wihouston/person)*100  // quarterly migration rate of the block
label variable p_inhoustonmig "block quarterly in-houston migration rate (%)"
drop moved_outhouston  moved_wihouston
save "$inny/block_migration_dataSTUD", replace
use "$inny/tempSTUD2", clear
gen person = 1 // everybody gets a 1, then the 1s are summed to the block-date level
collapse (sum) person  (mean) riskscore  ///
	 balance , by(tract_block qtr2) 
save "$inny/block_90plus_dataSTUD", replace
*MERGE THE TWO BLOCK LEVEL DATASETS TOGETHER
merge m:1 tract_block qtr2 using "$inny/block_migration_dataSTUD" 
drop _merge
* now merge in treatment variables for each houston block. 
* THIS IS WHERE WE DUMP ALL BLOCKS THAT ARE NOT IN HOUSTON
merge m:1 tract_block using "$inny/gis_edited" 
keep if _merge == 3  // keeping only houston blocks (by virtue of being in the GIS data)
drop _merge
* merge in block-level median credit variables
g tract_block_2017q2 = tract_block
merge m:1  tract_block_2017q2  using "$inny/blockmediancreditSTUD"
keep if _merge == 3  
drop _merge
drop tract_block_2017q2
egen tract_block_ = group(tract_block), label 
sort tract_block qtr2
quietly by tract_block qtr2: gen dup2 = cond(_N==1,0,_n)
tabulate dup2
sort tract_block_ qtr2
quietly by tract_block_ qtr2: gen dup = cond(_N==1,0,_n)
tabulate dup
tsset, clear
tsset tract_block_ qtr2
sort tract_block_ qtr2 
by tract_block_: gen date0 = qtr2[_n+1]  
format date0 %td
drop qtr2
rename date0 qtr2 
cap drop date0
* FOR REGRESSIONS
gen yq = qofd(qtr2)
format yq %tq
g POSTt =  (qtr2 > mdy(6,1,2017))
* must create year quarter dummies for regression	
	gen d2015q1 = (qtr2 == mdy(3, 1, 2015))
	gen d2015q2 = (qtr2 == mdy(6, 1, 2015))
	gen d2015q3 = (qtr2 == mdy(9, 1, 2015))
	gen d2015q4 = (qtr2 == mdy(12, 1, 2015))
	gen d2016q1 = (qtr2 == mdy(3, 1, 2016))
	gen d2016q2 = (qtr2 == mdy(6, 1, 2016))
	gen d2016q3 = (qtr2 == mdy(9, 1, 2016))
	gen d2016q4 = (qtr2 == mdy(12, 1, 2016))	
	gen d2017q1 = (qtr2 == mdy(3, 1, 2017))
	gen d2017q2 = (qtr2 == mdy(6, 1, 2017))
	gen d2017q3 = (qtr2 == mdy(9, 1, 2017))
	gen d2017q4 = (qtr2 == mdy(12, 1, 2017))
	gen d2018q1 = (qtr2 == mdy(3, 1, 2018))
	gen d2018q2 = (qtr2 == mdy(6, 1, 2018))
	gen d2018q3 = (qtr2 == mdy(9, 1, 2018))
	gen d2018q4 = (qtr2 == mdy(12, 1, 2018))
	gen d2019q1 = (qtr2 == mdy(3, 1, 2019))
	gen d2019q2 = (qtr2 == mdy(6, 1, 2019))
	gen d2019q3 = (qtr2 == mdy(9, 1, 2019))
	gen d2019q4 = (qtr2 == mdy(12, 1, 2019))
	gen d2020q1 = (qtr2 == mdy(3, 1, 2020))
	gen d2020q2 = (qtr2 == mdy(6, 1, 2020))
	gen d2020q3 = (qtr2 == mdy(9, 1, 2020))
	gen d2020q4 = (qtr2 == mdy(12, 1, 2020))
	gen d2021q1 = (qtr2 == mdy(3, 1, 2021))
	gen d2021q2 = (qtr2 == mdy(6, 1, 2021))
g high_owneroccupied_2017q2 = high_owneroccupied
g high_blockincome_2017q2  = high_blockincome
g high_minority_share_2017q2  = high_minority_share
g high_share_college_degree_2017q2 = high_share_college_degree
g high_blockhouseprice_2017q2  = high_blockhouseprice
g fp_in_devblock_2017q2  = fp_in_devblock
g fp_in_50pct_devblock_2017q2  = fp_in_50pct_devblock
g high_sharefpdevarea_2017q2  = high_sharefpdevarea
g share_Tarea_FP_2 = share_Tarea_FP*share_Tarea_FP
g share_Tarea_FP_3 = share_Tarea_FP_2*share_Tarea_FP
g share_dev_area_2 = share_dev_area*share_dev_area
g share_dev_area_3 = share_dev_area*share_dev_area*share_dev_area
g elevation_ft_2 = elevation_ft*elevation_ft
g elevation_ft_3 = elevation_ft_2*elevation_ft
g dist_stream_ft_2 = dist_stream_ft*dist_stream_ft
g dist_stream_ft_3 = dist_stream_ft_2*dist_stream_ft
g share_fpdev_devarea_2 = share_fpdev_devarea*share_fpdev_devarea
g share_fpdev_devarea_3 = share_fpdev_devarea*share_fpdev_devarea*share_fpdev_devarea
/*block x post interactions */
g POSTt_x_income = POSTt*medianhouseholdincomeint
g POSTt_x_ownerocc = POSTt*share_owner_occupied
g POSTt_x_minority = POSTt*minority_share
g POSTt_x_density = POSTt*density
g POSTt_x_homeval = POSTt*medianvaluedollars
g POSTt_x_FLP = POSTt*share_fpdev_devarea
g POSTt_x_college = POSTt*high_share_college_degree
/*block x quarter interactions */
g income = medianhouseholdincomeint
g ownerocc = share_owner_occupied
g minority = minority_share
g homeval = medianvaluedollars
g FLP = share_fpdev_devarea
* show 6 quarters pre 
drop if qtr2 < mdy(12,01,2015)  
save "$inny/block_level_dataSTUD" , replace
/*--------------------------------------------------------------*/





/* +++ */
/*!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
MACRO VARIABLES
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/
set matsize 11000
global score_supply riskscore    
global balance_outcomes  balance  
global ext_margin ibalance

*TREATMENT VARIABLES:
global treatments       ngroup_w_flood_depth     
global itreatments   	i4_w_flood_depth   	
*CONTROL VARIABLES
global BCHAR_x_POST POSTt_x_income  POSTt_x_ownerocc  POSTt_x_density POSTt_x_minority POSTt_x_homeval POSTt_x_FLP 
global AGECUBICit   age_2 
global FE0    	    person_id_ qtr2
* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!list of CCP variables of interest!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
/*!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/
	
	
	

/* +++ 
Note that the descriptive stats in the paper includes some measures -- like bankrupty, delinquency, mortgage balance -- that are not in the pseudo data and are thus dropped here*/
/*!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
DESCRIPTIVE STATISTICS
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/
/* +++ */
/* TABLE 2.A BLOCK LEVEL DESCRIPTIVE STATS ------------------------------------------- */
use "$inny/block_level_dataSTUD" , clear
keep if qtr2 == mdy(6,1,2017)
foreach binvar of varlist  ngroup_w_flood_depth   {	 // ngroup_avg_flood_depth
preserve
collapse (count) number_of_blocks = tractblock /* <<-just counts the number of non missing tractblock obs  */  ///
		(mean)  w_flood_depth    avg_share_of_devarea_flooded = share_dev_anyfl  ///
				avg_flood_depth_of_floodedarea = avg_flood_depth share_dev_area avg_share_of_devarea_in_fp=share_fpdev_devarea /// 
				share_Tarea_FP elevation=elevation_ft dist_stream=dist_stream_ft     ///
				medianhouseholdincomeint  medianvaluedollars   ///  
				share_owner_occupied share_poverty share_college_degree minority_share   /// 
				 p_outhoustonmig  p_inhoustonmig   ///
					if  !missing( `binvar' ), by( `binvar' )  
		xpose, clear  varname  // this transposes it long
		order _varname  // makes the variables the first row
		replace _varname = "Flood Depth Group" if _varname == "ngroup_avg_flood_depth" 
		replace _varname = "Num. of blocks" if _varname == "number_of_blocks" 	
		replace _varname = "W.Avg. flood depth across dev. area (ft)" if _varname == "w_flood_depth" 
		replace _varname = "% of dev. area that flooded" if _varname == "avg_share_of_devarea_flooded" 
		replace _varname = "Avg. flood depth of flooded area (ft)" if _varname == "avg_flood_depth_of_floodedarea" 
		replace _varname="% of total area that is developed" if _varname == "share_dev_area" 	
		replace _varname = "% of dev. area in floodplain" if _varname == "avg_share_of_devarea_in_fp" 
		replace _varname = "% of tot. area in floodplain" if _varname == "share_Tarea_FP" 
		replace _varname = "Elevation (ft)" if _varname == "elevation" 
		replace _varname = "Distance to stream (ft)" if _varname == "dist_stream" 
		replace _varname = "Med. household income ($)" if _varname == "medianhouseholdincomeint" 
		replace _varname = "Owner occupied (%)" if _varname == "share_owner_occupied" 
		replace _varname = "Med. home value ($)" if _varname == "medianvaluedollars" 
		replace _varname = "Poverty (%)" if _varname == "share_poverty" 
		replace _varname = "College degree (%)" if _varname == "share_college_degree" 
		replace _varname = "Minority share (%)" if _varname == "minority_share" 
		replace _varname = "Out-Houston migration rate" if _varname == "p_outhoustonmig" 
		export excel using "$outty/Table1BLOCKGIS_`binvar'_Q22017.xlsx" ,  replace // firstrow(varlabels)
restore
}
* Q22017: Mean, SD, p10, p50, Min, Max  (not by treatment bin)
eststo clear
eststo: estpost su  w_flood_depth share_dev_anyfl avg_flood_depth share_fpdev_devarea share_Tarea_FP elevation_ft dist_stream_ft ///
					medianhouseholdincomeint  medianvaluedollars   share_owner_occupied share_poverty share_college_degree minority_share	///
					 p_outhoustonmig     ///
					, detail
esttab using "$outty/Table1BLOCKGIS_min_max_Q22017.csv", replace cells("mean(pattern(1 1) fmt(2)) sd(pattern(1 1))  min(pattern(1 1 1)) p10(pattern(1 1 1)) p50(pattern(1 1 1)) p90(pattern(1 1 1))   max(pattern(1 1 1))") label    title("Min Max GIS Data Matched to CCP Data Q22017")



/* TABLE 2.B INDIVIDUAL LEVEL DESCRIPTIVE STATS ------------------------------------------- */
use "$inny/ccp_gis_prepped_data_0STUD" , clear
set matsize 1100
* flood bin mean
foreach binvar of varlist  ngroup_w_flood_depth   {	
		eststo clear
		sort `binvar'
		by `binvar': eststo: estpost su  $score_supply  $balance_outcomes   $ext_margin    if qtr2 == mdy(6,1,2017) & !missing(`binvar')
		esttab using "$outty/Table1CREDIT_Q22017.csv", replace cells("mean(pattern(1 1) fmt(2))") label    title("Credit Data Summary Statistics: Means by treatment group Q22017-Hurricane")

		eststo clear
		eststo: estpost su  $score_supply  $balance_outcomes   $ext_margin    if qtr2 == mdy(6,1,2017)  , detail
		esttab using "$outty/Table1CREDIT_min_max_Q22017.csv", replace cells("mean(pattern(1 1) fmt(2)) sd(pattern(1 1))  min(pattern(1 1 1)) p10(pattern(1 1 1)) p50(pattern(1 1 1)) p90(pattern(1 1 1))   max(pattern(1 1 1))") label    title("Credit Data Summary Statistics Q22017-Hurricane")
}

* Treatment
* INDIVIDUAL-LEVEL PRE VS. POST
*use "$inny/ccp_gis_prepped_data_0" , clear
keep if !missing(w_flood_depth)
g post_ = .
replace post_ = 0 if qtr2 <= mdy(6,1,2017)
replace post_ = 1 if qtr2 > mdy(6,1,2017) 
* Min Max version, pre vs. post not by treatment bin
eststo clear
sort post_
by post_ :eststo: estpost su   $score_supply  $balance_outcomes   $ext_margin  ///
				if !missing(post_)	, detail
esttab using "$outty/Table1CREDIT_min_max_PREv2yrPOST.csv", replace cells("mean(pattern(1 1) fmt(2)) sd(pattern(1 1))  min(pattern(1 1 1)) p10(pattern(1 1 1)) p50(pattern(1 1 1)) p90(pattern(1 1 1))   max(pattern(1 1 1))") label    title("Min Max GIS Data Matched to CCP Data pre-Hurricane")
/*!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/



/* NOTE: pseudo data will start to bug out here because of the high dimensional fixed effects -- i.e., there 
        is not enough variation built in the pseudo data within each treatment bin. 
		Also, there are too few zero student debt balances in the pseudo data, making it harder to 
		estimate an extensive margin) */
/* +++ FIGURE 2 */
/*!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
INDIVIDUAL EVENT STUDY GRAPHS
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/
use "$inny/ccp_gis_prepped_data_0STUD" , clear
* FIXED EFFECT OPTIONS	
global  FE0    		 	person_id_ qtr2  
global clust			tract_block_2017q2_ qtr2
*All
global studentvarsS2 	ibalance // new_acct_per_inquiry icosigned_stud  << alternative variables used in Figure A5 and Figure A7.b
/*!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/
/*---------------------------------------------------------------------------------------------------------------------------------------------*/	
* EXTENSIVE
foreach Tk of varlist w_flood_depth     {
use "$inny/ccp_gis_prepped_data_0STUD" , clear
set matsize 11000
* Build the treatment x date variables;
	* first loop over years 2010-2021
	forvalues z = 15/20 {
	* second, loop over treatment groups 1-4 
	forvalues q = 1/4 {
	gen d20`z'q1_x_i`q' = i`q'_`Tk' * d20`z'q1
	gen d20`z'q2_x_i`q'= i`q'_`Tk' * d20`z'q2
	gen d20`z'q3_x_i`q' = i`q'_`Tk' * d20`z'q3
	gen d20`z'q4_x_i`q'= i`q'_`Tk' * d20`z'q4
	}
	* sum T1-T2 (i2+i3)
	gen d20`z'q1_x_i23 = d20`z'q1_x_i2 + d20`z'q1_x_i3 // + d20`z'q1_x_i4
	gen d20`z'q2_x_i23 = d20`z'q2_x_i2 + d20`z'q2_x_i3 // + d20`z'q2_x_i4
	gen d20`z'q3_x_i23 = d20`z'q3_x_i2 + d20`z'q3_x_i3 // + d20`z'q3_x_i4
	gen d20`z'q4_x_i23 = d20`z'q4_x_i2 + d20`z'q4_x_i3 // + d20`z'q4_x_i4
	}
	* because 2021 data ends after Q2, must do these two quarters mannually:
	gen d2021q1_x_i1 = i1_`Tk' * d2021q1
	gen d2021q1_x_i2 = i2_`Tk' * d2021q1
	gen d2021q1_x_i3 = i3_`Tk' * d2021q1
	gen d2021q1_x_i4 = i4_`Tk' * d2021q1
	gen d2021q2_x_i1= i1_`Tk' * d2021q2
	gen d2021q2_x_i2= i2_`Tk' * d2021q2
	gen d2021q2_x_i3= i3_`Tk' * d2021q2
	gen d2021q2_x_i4= i4_`Tk' * d2021q2
	* sum T1-T2 (i2+i3)
	gen d2021q1_x_i23 = d2021q1_x_i2 + d2021q1_x_i3 // + d2021q1_x_i4
	gen d2021q2_x_i23 = d2021q2_x_i2 + d2021q2_x_i3 // + d2021q2_x_i4
drop if nqtr<23 // enforces sample balance (robust to unbalanced: Figure A6)
foreach c in  1960   { 

/*-------------------------------------------------------------------------------------*/
/*-------AGE EFFECTS (extensive margin)------*/
* AGE
foreach r in 1 2 3 4 5 { 
* FIGURE 2.A
foreach depvar of varlist  $studentvarsS2 {
preserve
statsby _b _se , clear: quietly reghdfe `depvar' ///			 
 			d2015q4_x_i23 	 			 	d2015q4_x_i4	 ///						 
 			d2016q1_x_i23 	 			 	d2016q1_x_i4	///
 			d2016q2_x_i23 	 			 	d2016q2_x_i4	///
 			d2016q3_x_i23 	 			 	d2016q3_x_i4	///
 			d2016q4_x_i23 	 			 	d2016q4_x_i4	 ///			 
 			d2017q1_x_i23 	 			 	d2017q1_x_i4			/*	d2017q2_x_i23 	 			d2017q2_x_i3 	d2017q2_x_i4	 control */   ///
 			d2017q3_x_i23 	 			 	d2017q3_x_i4	///
 			d2017q4_x_i23 	 			 	d2017q4_x_i4	 ///			 
 			d2018q1_x_i23 	 			 	d2018q1_x_i4	///
 			d2018q2_x_i23 	 			 	d2018q2_x_i4	///
 			d2018q3_x_i23 	 			 	d2018q3_x_i4	///
 			d2018q4_x_i23 	 			 	d2018q4_x_i4	 ///			 
 			d2019q1_x_i23 	 			 	d2019q1_x_i4	///
 			d2019q2_x_i23 	 			 	d2019q2_x_i4	///
 			d2019q3_x_i23 	 			 	d2019q3_x_i4	///
 			d2019q4_x_i23 	 			 	d2019q4_x_i4	 ///		
			d2020q1_x_i23 	 			 	d2020q1_x_i4	///
 			d2020q2_x_i23 	 			 	d2020q2_x_i4	///
 			d2020q3_x_i23 	 			 	d2020q3_x_i4	///
 			d2020q4_x_i23 	 			 	d2020q4_x_i4	 ///	
			d2021q1_x_i23 	 			 	d2021q1_x_i4	///
 			d2021q2_x_i23 	 			 	d2021q2_x_i4	///		 	 	 	 
			$AGECUBICit $BCHAR_x_POST   if age_cut == `r' , absorb( $FE0     )	cluster( $clust ) 
keep  *x_i4* // keep only the coeffs of interest
 g _se_d2017q2_x_i4 = .
 g _b_d2017q2_x_i4 = 0
xpose, clear  varname  // this transposes it long
g beta_se = substr(_varname,1,3)  // keeps only the first 3 characters of the string
replace beta_se = subinstr(beta_se, "_", "",.) // removes the underbar
replace _varname = subinstr(_varname, "_b_", "",.) 
replace _varname = subinstr(_varname, "_se_", "",.) 
replace _varname = subinstr(_varname, "d", "",.) 
g group = substr(_varname,-1,.)  // keeps only the last character
replace _varname = subinstr(_varname, "_x_i4", "",.) 
gen date = quarterly(_varname, "qtr2") // turns the string variable into a year-quarter variable
format date %tq
reshape wide v1, i(date) j(beta_se, string)
rename v1b b_
rename v1se se_
gen ci_low_ = b_ - (`c'/1000)*se_  // 1960  
gen ci_high_ = b_ + (`c'/1000)*se_
drop   se_ 
* Graph coefficient and the confidence interval for each treatment group on each date
#delimit ;
twoway  (scatter b_ date , mcolor(black) msymbol(O) lcolor(black) lpattern(solid) )
		(rspike  ci_low_ ci_high_ date , color(gray) ) ,
		legend(order(1 2 )) legend(order(1 "Top tercile flood depth" 2 "95% CI" ) col(2) pos(7) ring(0))
		title(`depvar' A`r'_CI`c')
		ytitle("Treatment Effect" ) xtitle("")
		yline(0, lcolor(black)) xline(229.5, lcolor(black) lpattern(dash)) 
		graphregion(margin(medium) color(white))  plotregion(fcolor(white)) graphregion(fcolor(white))	 ;
#delimit cr ;
	graph save "$outty/Event_`depvar'_`Tk'_A`r'_CI`c'.gph", replace	
restore	
} // close dependent1

* FIGURE A9.A
* BY income & AGE
foreach v in 1 { 
foreach depvar of varlist  $studentvarsS2 {
preserve
statsby _b _se , clear: quietly reghdfe `depvar' ///					 
 			d2015q4_x_i23 	 			 	d2015q4_x_i4	 ///						 
 			d2016q1_x_i23 	 			 	d2016q1_x_i4	///
 			d2016q2_x_i23 	 			 	d2016q2_x_i4	///
 			d2016q3_x_i23 	 			 	d2016q3_x_i4	///
 			d2016q4_x_i23 	 			 	d2016q4_x_i4	 ///			 
 			d2017q1_x_i23 	 			 	d2017q1_x_i4			/*	d2017q2_x_i23 	 			d2017q2_x_i3 	d2017q2_x_i4	 control */   ///
 			d2017q3_x_i23 	 			 	d2017q3_x_i4	///
 			d2017q4_x_i23 	 			 	d2017q4_x_i4	 ///			 
 			d2018q1_x_i23 	 			 	d2018q1_x_i4	///
 			d2018q2_x_i23 	 			 	d2018q2_x_i4	///
 			d2018q3_x_i23 	 			 	d2018q3_x_i4	///
 			d2018q4_x_i23 	 			 	d2018q4_x_i4	 ///			 
 			d2019q1_x_i23 	 			 	d2019q1_x_i4	///
 			d2019q2_x_i23 	 			 	d2019q2_x_i4	///
 			d2019q3_x_i23 	 			 	d2019q3_x_i4	///
 			d2019q4_x_i23 	 			 	d2019q4_x_i4	 ///		
			d2020q1_x_i23 	 			 	d2020q1_x_i4	///
 			d2020q2_x_i23 	 			 	d2020q2_x_i4	///
 			d2020q3_x_i23 	 			 	d2020q3_x_i4	///
 			d2020q4_x_i23 	 			 	d2020q4_x_i4	 ///	
			d2021q1_x_i23 	 			 	d2021q1_x_i4	///
 			d2021q2_x_i23 	 			 	d2021q2_x_i4	///			 	 		 	 
			$AGECUBICit $BCHAR_x_POST   if  constrained_2017q2 == `v' & age_cut == `r' , absorb( $FE0     )	cluster( $clust ) 
keep  *x_i4* // keep only the coeffs of interest
 g _se_d2017q2_x_i4 = .
 g _b_d2017q2_x_i4 = 0
xpose, clear  varname  // this transposes it long
g beta_se = substr(_varname,1,3)  // keeps only the first 3 characters of the string
replace beta_se = subinstr(beta_se, "_", "",.) // removes the underbar
replace _varname = subinstr(_varname, "_b_", "",.) 
replace _varname = subinstr(_varname, "_se_", "",.) 
replace _varname = subinstr(_varname, "d", "",.) 
g group = substr(_varname,-1,.)  // keeps only the last character
replace _varname = subinstr(_varname, "_x_i4", "",.) 
gen date = quarterly(_varname, "qtr2") // turns the string variable into a year-quarter variable
format date %tq
reshape wide v1, i(date) j(beta_se, string)
rename v1b b_
rename v1se se_
gen ci_low_ = b_ - (`c'/1000)*se_  // 1960 
gen ci_high_ = b_ + (`c'/1000)*se_
drop   se_ 
* Graph coefficient and the confidence interval for each treatment group on each date
#delimit ;
twoway  (scatter b_ date , mcolor(black) msymbol(O) lcolor(black) lpattern(solid) )
		(rspike  ci_low_ ci_high_ date , color(gray) ) ,
		legend(order(1 2 )) legend(order(1 "Top tercile flood depth" 2 "95% CI" ) col(2) pos(7) ring(0))
		title(`depvar' H`v'_A`r'_CI`c')
		ytitle("Treatment Effect" ) xtitle("")
		yline(0, lcolor(black)) xline(229.5, lcolor(black) lpattern(dash)) 
		graphregion(margin(medium) color(white))  plotregion(fcolor(white)) graphregion(fcolor(white))	 ;
#delimit cr ;
	graph save "$outty/Event_`depvar'_`Tk'_H`v'_A`r'_CI`c'.gph", replace	
restore	
} // close dependent1
} // close income

* BY OWNOCC & AGE
foreach v in 1 { 
foreach depvar of varlist  $studentvarsS2 {
preserve
statsby _b _se , clear: quietly reghdfe `depvar' ///					 
 			d2015q4_x_i23 	 			 	d2015q4_x_i4	 ///						 
 			d2016q1_x_i23 	 			 	d2016q1_x_i4	///
 			d2016q2_x_i23 	 			 	d2016q2_x_i4	///
 			d2016q3_x_i23 	 			 	d2016q3_x_i4	///
 			d2016q4_x_i23 	 			 	d2016q4_x_i4	 ///			 
 			d2017q1_x_i23 	 			 	d2017q1_x_i4			/*	d2017q2_x_i23 	 			d2017q2_x_i3 	d2017q2_x_i4	 control */   ///
 			d2017q3_x_i23 	 			 	d2017q3_x_i4	///
 			d2017q4_x_i23 	 			 	d2017q4_x_i4	 ///			 
 			d2018q1_x_i23 	 			 	d2018q1_x_i4	///
 			d2018q2_x_i23 	 			 	d2018q2_x_i4	///
 			d2018q3_x_i23 	 			 	d2018q3_x_i4	///
 			d2018q4_x_i23 	 			 	d2018q4_x_i4	 ///			 
 			d2019q1_x_i23 	 			 	d2019q1_x_i4	///
 			d2019q2_x_i23 	 			 	d2019q2_x_i4	///
 			d2019q3_x_i23 	 			 	d2019q3_x_i4	///
 			d2019q4_x_i23 	 			 	d2019q4_x_i4	 ///		
			d2020q1_x_i23 	 			 	d2020q1_x_i4	///
 			d2020q2_x_i23 	 			 	d2020q2_x_i4	///
 			d2020q3_x_i23 	 			 	d2020q3_x_i4	///
 			d2020q4_x_i23 	 			 	d2020q4_x_i4	 ///	
			d2021q1_x_i23 	 			 	d2021q1_x_i4	///
 			d2021q2_x_i23 	 			 	d2021q2_x_i4	///			 	 		 	 
			$AGECUBICit $BCHAR_x_POST   if  high_owneroccupied_2017q2 == `v' & age_cut == `r' , absorb( $FE0     )	cluster( $clust ) 
keep  *x_i4* // keep only the coeffs of interest
 g _se_d2017q2_x_i4 = .
 g _b_d2017q2_x_i4 = 0
xpose, clear  varname  // this transposes it long
g beta_se = substr(_varname,1,3)  // keeps only the first 3 characters of the string
replace beta_se = subinstr(beta_se, "_", "",.) // removes the underbar
replace _varname = subinstr(_varname, "_b_", "",.) 
replace _varname = subinstr(_varname, "_se_", "",.) 
replace _varname = subinstr(_varname, "d", "",.) 
g group = substr(_varname,-1,.)  // keeps only the last character
replace _varname = subinstr(_varname, "_x_i4", "",.) 
gen date = quarterly(_varname, "qtr2") // turns the string variable into a year-quarter variable
format date %tq
reshape wide v1, i(date) j(beta_se, string)
rename v1b b_
rename v1se se_
gen ci_low_ = b_ - (`c'/1000)*se_  // 1960 
gen ci_high_ = b_ + (`c'/1000)*se_
drop   se_ 
* Graph coefficient and the confidence interval for each treatment group on each date
#delimit ;
twoway  (scatter b_ date , mcolor(black) msymbol(O) lcolor(black) lpattern(solid) )
		(rspike  ci_low_ ci_high_ date , color(gray) ) ,
		legend(order(1 2 )) legend(order(1 "Top tercile flood depth" 2 "95% CI" ) col(2) pos(7) ring(0))
		title(`depvar' O`v'_A`r'_CI`c')
		ytitle("Treatment Effect" ) xtitle("")
		yline(0, lcolor(black)) xline(229.5, lcolor(black) lpattern(dash)) 
		graphregion(margin(medium) color(white))  plotregion(fcolor(white)) graphregion(fcolor(white))	 ;
#delimit cr ;
	graph save "$outty/Event_`depvar'_`Tk'_O`v'_A`r'_CI`c'.gph", replace	
restore	
} // close dependent1
} // close ownocc

* BY college & AGE
foreach v in 0 { 
foreach depvar of varlist  $studentvarsS2 {
preserve
statsby _b _se , clear: quietly reghdfe `depvar' ///					 
 			d2015q4_x_i23 	 			 	d2015q4_x_i4	 ///						 
 			d2016q1_x_i23 	 			 	d2016q1_x_i4	///
 			d2016q2_x_i23 	 			 	d2016q2_x_i4	///
 			d2016q3_x_i23 	 			 	d2016q3_x_i4	///
 			d2016q4_x_i23 	 			 	d2016q4_x_i4	 ///			 
 			d2017q1_x_i23 	 			 	d2017q1_x_i4			/*	d2017q2_x_i23 	 			d2017q2_x_i3 	d2017q2_x_i4	 control */   ///
 			d2017q3_x_i23 	 			 	d2017q3_x_i4	///
 			d2017q4_x_i23 	 			 	d2017q4_x_i4	 ///			 
 			d2018q1_x_i23 	 			 	d2018q1_x_i4	///
 			d2018q2_x_i23 	 			 	d2018q2_x_i4	///
 			d2018q3_x_i23 	 			 	d2018q3_x_i4	///
 			d2018q4_x_i23 	 			 	d2018q4_x_i4	 ///			 
 			d2019q1_x_i23 	 			 	d2019q1_x_i4	///
 			d2019q2_x_i23 	 			 	d2019q2_x_i4	///
 			d2019q3_x_i23 	 			 	d2019q3_x_i4	///
 			d2019q4_x_i23 	 			 	d2019q4_x_i4	 ///		
			d2020q1_x_i23 	 			 	d2020q1_x_i4	///
 			d2020q2_x_i23 	 			 	d2020q2_x_i4	///
 			d2020q3_x_i23 	 			 	d2020q3_x_i4	///
 			d2020q4_x_i23 	 			 	d2020q4_x_i4	 ///	
			d2021q1_x_i23 	 			 	d2021q1_x_i4	///
 			d2021q2_x_i23 	 			 	d2021q2_x_i4	///		 	 		 	 
			$AGECUBICit $BCHAR_x_POST   if  high_share_college_degree_2017q2 == `v' & age_cut == `r' , absorb( $FE0     )	cluster( $clust ) 
keep  *x_i4* // keep only the coeffs of interest
 g _se_d2017q2_x_i4 = .
 g _b_d2017q2_x_i4 = 0
xpose, clear  varname  // this transposes it long
g beta_se = substr(_varname,1,3)  // keeps only the first 3 characters of the string
replace beta_se = subinstr(beta_se, "_", "",.) // removes the underbar
replace _varname = subinstr(_varname, "_b_", "",.) 
replace _varname = subinstr(_varname, "_se_", "",.) 
replace _varname = subinstr(_varname, "d", "",.) 
g group = substr(_varname,-1,.)  // keeps only the last character
replace _varname = subinstr(_varname, "_x_i4", "",.) 
gen date = quarterly(_varname, "qtr2") // turns the string variable into a year-quarter variable
format date %tq
reshape wide v1, i(date) j(beta_se, string)
rename v1b b_
rename v1se se_
gen ci_low_ = b_ - (`c'/1000)*se_  // 1960  
gen ci_high_ = b_ + (`c'/1000)*se_
drop   se_ 
* Graph coefficient and the confidence interval for each treatment group on each date
#delimit ;
twoway  (scatter b_ date , mcolor(black) msymbol(O) lcolor(black) lpattern(solid) )
		(rspike  ci_low_ ci_high_ date , color(gray) ) ,
		legend(order(1 2 )) legend(order(1 "Top tercile flood depth" 2 "95% CI" ) col(2) pos(7) ring(0))
		title(`depvar' C`v'_A`r'_CI`c')
		ytitle("Treatment Effect" ) xtitle("")
		yline(0, lcolor(black)) xline(229.5, lcolor(black) lpattern(dash)) 
		graphregion(margin(medium) color(white))  plotregion(fcolor(white)) graphregion(fcolor(white))	 ;
#delimit cr ;
	graph save "$outty/Event_`depvar'_`Tk'_C`v'_A`r'_CI`c'.gph", replace	
restore	
} // close dependent1
} // close college

* college and high owner & by AGE
foreach v in 0 { 
foreach depvar of varlist  $studentvarsS2 {
preserve
statsby _b _se , clear: quietly reghdfe `depvar' ///					 
 			d2015q4_x_i23 	 			 	d2015q4_x_i4	 ///						 
 			d2016q1_x_i23 	 			 	d2016q1_x_i4	///
 			d2016q2_x_i23 	 			 	d2016q2_x_i4	///
 			d2016q3_x_i23 	 			 	d2016q3_x_i4	///
 			d2016q4_x_i23 	 			 	d2016q4_x_i4	 ///			 
 			d2017q1_x_i23 	 			 	d2017q1_x_i4			/*	d2017q2_x_i23 	 			d2017q2_x_i3 	d2017q2_x_i4	 control */   ///
 			d2017q3_x_i23 	 			 	d2017q3_x_i4	///
 			d2017q4_x_i23 	 			 	d2017q4_x_i4	 ///			 
 			d2018q1_x_i23 	 			 	d2018q1_x_i4	///
 			d2018q2_x_i23 	 			 	d2018q2_x_i4	///
 			d2018q3_x_i23 	 			 	d2018q3_x_i4	///
 			d2018q4_x_i23 	 			 	d2018q4_x_i4	 ///			 
 			d2019q1_x_i23 	 			 	d2019q1_x_i4	///
 			d2019q2_x_i23 	 			 	d2019q2_x_i4	///
 			d2019q3_x_i23 	 			 	d2019q3_x_i4	///
 			d2019q4_x_i23 	 			 	d2019q4_x_i4	 ///		
			d2020q1_x_i23 	 			 	d2020q1_x_i4	///
 			d2020q2_x_i23 	 			 	d2020q2_x_i4	///
 			d2020q3_x_i23 	 			 	d2020q3_x_i4	///
 			d2020q4_x_i23 	 			 	d2020q4_x_i4	 ///	
			d2021q1_x_i23 	 			 	d2021q1_x_i4	///
 			d2021q2_x_i23 	 			 	d2021q2_x_i4	///		 	 		 	 
			$AGECUBICit $BCHAR_x_POST   if  high_share_college_degree_2017q2 == `v' & high_owneroccupied_2017q2==1  & age_cut == `r' , absorb( $FE0     )	cluster( $clust ) 
keep  *x_i4* // keep only the coeffs of interest
 g _se_d2017q2_x_i4 = .
 g _b_d2017q2_x_i4 = 0
xpose, clear  varname  // this transposes it long
g beta_se = substr(_varname,1,3)  // keeps only the first 3 characters of the string
replace beta_se = subinstr(beta_se, "_", "",.) // removes the underbar
replace _varname = subinstr(_varname, "_b_", "",.) 
replace _varname = subinstr(_varname, "_se_", "",.) 
replace _varname = subinstr(_varname, "d", "",.) 
g group = substr(_varname,-1,.)  // keeps only the last character
replace _varname = subinstr(_varname, "_x_i4", "",.) 
gen date = quarterly(_varname, "qtr2") // turns the string variable into a year-quarter variable
format date %tq
reshape wide v1, i(date) j(beta_se, string)
rename v1b b_
rename v1se se_
gen ci_low_ = b_ - (`c'/1000)*se_  // 1960 
gen ci_high_ = b_ + (`c'/1000)*se_
drop   se_ 
* Graph coefficient and the confidence interval for each treatment group on each date
#delimit ;
twoway  (scatter b_ date , mcolor(black) msymbol(O) lcolor(black) lpattern(solid) )
		(rspike  ci_low_ ci_high_ date , color(gray) ) ,
		legend(order(1 2 )) legend(order(1 "Top tercile flood depth" 2 "95% CI" ) col(2) pos(7) ring(0))
		title(`depvar' C`v'_O1_A`r'_CI`c')
		ytitle("Treatment Effect" ) xtitle("")
		yline(0, lcolor(black)) xline(229.5, lcolor(black) lpattern(dash)) 
		graphregion(margin(medium) color(white))  plotregion(fcolor(white)) graphregion(fcolor(white))	 ;
#delimit cr ;
	graph save "$outty/Event_`depvar'_`Tk'_C`v'_O1_A`r'_CI`c'.gph", replace	
restore	
} // close dependent1
} // close college

} // close age

} // close CI
} // close treatment

*  Combine and format for figures
#d
 			graph combine  	// Figure 2.A
							"$outty/Event_ibalance_w_flood_depth_A1_CI1960" /* 1 a 18-25 */
							"$outty/Event_ibalance_w_flood_depth_A2_CI1960" /* 2 b 26-45 */
							"$outty/Event_ibalance_w_flood_depth_A3_CI1960" /* 3 c >45 */
							"$outty/Event_ibalance_w_flood_depth_A4_CI1960" /* 2 b 26-45 */
							"$outty/Event_ibalance_w_flood_depth_A5_CI1960" /* 3 c >45 */
							// Figure A9.A
							"$outty/Event_ibalance_w_flood_depth_A1_CI1960" /* 5 e 18-25 */
							"$outty/Event_ibalance_w_flood_depth_H1_A1_CI1960" /* 5 e 18-23 */
							"$outty/Event_ibalance_w_flood_depth_C0_A1_CI1960" /* 8 g 18-23 */
							"$outty/Event_ibalance_w_flood_depth_O1_A1_CI1960" /* 6 f 18-23 */
							"$outty/Event_ibalance_w_flood_depth_C0_O1_A1_CI1960" /* 6 f 18-23 */
			,  xcommon   rows(2) cols(5) fxsize(180)  fysize(300) altshrink  plotregion(fcolor(white)) graphregion(fcolor(white)) graphregion(margin(small) color(white))   imargin(6 6 1 1)
			title("",size(small))   play("$inny/STUD_EXT_RFS1.grec") 
;


/* +++ */
* INTENSIVE EVENT STUDIES FIGURE 2.B //////////////////////////////////////////////////////////////////////////////////////
* FIXED EFFECT OPTIONS	
global  FE0    		 	person_id_ qtr2  
global clust			tract_block_2017q2_ qtr2
*Y>0
global studentvarsS1 	balance    
* /////////////////////////////////////////////////////////////////////////////////////////////////////////////

* INTENSIVE EVENT STUDIES
foreach Tk of varlist w_flood_depth    {
use "$inny/ccp_gis_prepped_data_0STUD" , clear
set matsize 11000
* Build the treatment x date variables;
	* first loop over years 2010-2021
	forvalues z = 15/20 {
	* second, loop over treatment groups 1-4 
	forvalues q = 1/4 {
	gen d20`z'q1_x_i`q' = i`q'_`Tk' * d20`z'q1
	gen d20`z'q2_x_i`q'= i`q'_`Tk' * d20`z'q2
	gen d20`z'q3_x_i`q' = i`q'_`Tk' * d20`z'q3
	gen d20`z'q4_x_i`q'= i`q'_`Tk' * d20`z'q4
	}
	* sum T1-T2 (i2+i3)
	gen d20`z'q1_x_i23 = d20`z'q1_x_i2 + d20`z'q1_x_i3 // + d20`z'q1_x_i4
	gen d20`z'q2_x_i23 = d20`z'q2_x_i2 + d20`z'q2_x_i3 // + d20`z'q2_x_i4
	gen d20`z'q3_x_i23 = d20`z'q3_x_i2 + d20`z'q3_x_i3 // + d20`z'q3_x_i4
	gen d20`z'q4_x_i23 = d20`z'q4_x_i2 + d20`z'q4_x_i3 // + d20`z'q4_x_i4
	}
	* because 2021 data ends after Q2, must do these two quarters mannually:
	gen d2021q1_x_i1 = i1_`Tk' * d2021q1
	gen d2021q1_x_i2 = i2_`Tk' * d2021q1
	gen d2021q1_x_i3 = i3_`Tk' * d2021q1
	gen d2021q1_x_i4 = i4_`Tk' * d2021q1
	gen d2021q2_x_i1= i1_`Tk' * d2021q2
	gen d2021q2_x_i2= i2_`Tk' * d2021q2
	gen d2021q2_x_i3= i3_`Tk' * d2021q2
	gen d2021q2_x_i4= i4_`Tk' * d2021q2
	* sum T1-T2 (i2+i3)
	gen d2021q1_x_i23 = d2021q1_x_i2 + d2021q1_x_i3 // + d2021q1_x_i4
	gen d2021q2_x_i23 = d2021q2_x_i2 + d2021q2_x_i3 // + d2021q2_x_i4
drop if nqtr<23 // enforces balance

foreach c in  1960   { 

* AGE
foreach r in 1 2 3 4 5  { 

* FIGURE 2.B
foreach depvar of varlist  $studentvarsS1 {
preserve
statsby _b _se , clear: quietly reghdfe `depvar' ///					 
 			d2015q4_x_i23 	 			 	d2015q4_x_i4	 ///						 
 			d2016q1_x_i23 	 			 	d2016q1_x_i4	///
 			d2016q2_x_i23 	 			 	d2016q2_x_i4	///
 			d2016q3_x_i23 	 			 	d2016q3_x_i4	///
 			d2016q4_x_i23 	 			 	d2016q4_x_i4	 ///			 
 			d2017q1_x_i23 	 			 	d2017q1_x_i4			/*	d2017q2_x_i23 	 			d2017q2_x_i3 	d2017q2_x_i4	 control */   ///
 			d2017q3_x_i23 	 			 	d2017q3_x_i4	///
 			d2017q4_x_i23 	 			 	d2017q4_x_i4	 ///			 
 			d2018q1_x_i23 	 			 	d2018q1_x_i4	///
 			d2018q2_x_i23 	 			 	d2018q2_x_i4	///
 			d2018q3_x_i23 	 			 	d2018q3_x_i4	///
 			d2018q4_x_i23 	 			 	d2018q4_x_i4	 ///			 
 			d2019q1_x_i23 	 			 	d2019q1_x_i4	///
 			d2019q2_x_i23 	 			 	d2019q2_x_i4	///
 			d2019q3_x_i23 	 			 	d2019q3_x_i4	///
 			d2019q4_x_i23 	 			 	d2019q4_x_i4	 ///		
			d2020q1_x_i23 	 			 	d2020q1_x_i4	///
 			d2020q2_x_i23 	 			 	d2020q2_x_i4	///
 			d2020q3_x_i23 	 			 	d2020q3_x_i4	///
 			d2020q4_x_i23 	 			 	d2020q4_x_i4	 ///	
			d2021q1_x_i23 	 			 	d2021q1_x_i4	///
 			d2021q2_x_i23 	 			 	d2021q2_x_i4	///	 	 	 	 
			$AGECUBICit $BCHAR_x_POST   if ibalance_2017q2==1 & age_cut == `r' , absorb( $FE0     )	cluster( $clust ) 
keep  *x_i4* // keep only the coeffs of interest
 g _se_d2017q2_x_i4 = .
 g _b_d2017q2_x_i4 = 0
xpose, clear  varname  // this transposes it long
g beta_se = substr(_varname,1,3)  // keeps only the first 3 characters of the string
replace beta_se = subinstr(beta_se, "_", "",.) // removes the underbar
replace _varname = subinstr(_varname, "_b_", "",.) 
replace _varname = subinstr(_varname, "_se_", "",.) 
replace _varname = subinstr(_varname, "d", "",.) 
g group = substr(_varname,-1,.)  // keeps only the last character
replace _varname = subinstr(_varname, "_x_i4", "",.) 
gen date = quarterly(_varname, "qtr2") // turns the string variable into a year-quarter variable
format date %tq
reshape wide v1, i(date) j(beta_se, string)
rename v1b b_
rename v1se se_
gen ci_low_ = b_ - (`c'/1000)*se_  // 1960 
gen ci_high_ = b_ + (`c'/1000)*se_
drop   se_ 
* Graph coefficient and the confidence interval for each treatment group on each date
#delimit ;
twoway  (scatter b_ date , mcolor(black) msymbol(O) lcolor(black) lpattern(solid) )
		(rspike  ci_low_ ci_high_ date , color(gray) ) ,
		legend(order(1 2 )) legend(order(1 "Top tercile flood depth" 2 "95% CI" ) col(2) pos(7) ring(0))
		title(`depvar' S1_A`r'_CI`c')
		ytitle("Treatment Effect" ) xtitle("")
		yline(0, lcolor(black)) xline(229.5, lcolor(black) lpattern(dash)) 
		graphregion(margin(medium) color(white))  plotregion(fcolor(white)) graphregion(fcolor(white))	 ;
#delimit cr ;
	graph save "$outty/Event_`depvar'_`Tk'_S1_A`r'_CI`c'.gph", replace	
restore	
} // close dependent1

} // close age

} // close CI
} // close treatment
* FIGURE 2.B
#d
 			graph combine   
							"$outty/Event_balance_w_flood_depth_S1_A1_CI1960" 
							"$outty/Event_balance_w_flood_depth_S1_A2_CI1960" 
							"$outty/Event_balance_w_flood_depth_S1_A3_CI1960" 
							"$outty/Event_balance_w_flood_depth_S1_A4_CI1960" 
							"$outty/Event_balance_w_flood_depth_S1_A5_CI1960" 		
			, ycommon xcommon   rows(1) cols(5) fxsize(180)  fysize(150) altshrink  plotregion(fcolor(white)) graphregion(fcolor(white)) graphregion(margin(small) color(white))   imargin(6 6 1 1)
			title("",size(small))   play("$inny/STUD_INT_RFS1.grec") 
;



/* +++ TABLE 3 
* psuedo data will bug out here because it was built with mostly non-zero student loan balances (there are few zero balances). 
  In the real CCP, there are of course lots of people without student debt as of the dawn of the hurricane. */
/*----------------------------------------------------------------------------------*/
* HAZARD STUDENT DEBT EXTENSIVE MARGIN INDIVIDUAL LEVEL    ---------------------------------------------------------
use "$inny/ccp_gis_prepped_data_0STUD" , clear  
set matsize 11000
*Extensive Margin of Debt balances HAZARD ------------------------------------------------------------
foreach Tk of varlist  w_flood_depth     {
preserve
/*------------hazard model------------*/
tsset, clear
sort person_id_ qtr2 
drop if balance_2017q2>0 // identify student debt holders as of just before the hurricane (psuedo data will bug out here because it was only built with non-zero student loan balances)
by person_id_: gen lag_stud = ibalance[_n-1] 
drop if lag_stud==1  // drop obs in the first post period after opening a student loan balance
keep if qtr2 >= mdy(6,1,2017)
/*------------only for hazard model for individual level student debt------------*/
* generate treatment interactions with the post period
g POSTt_x_Tk2 = POSTt * i2_`Tk'
g POSTtyr1_x_Tk2 = POSTt_yr1 * i2_`Tk'
g POSTtyr23_x_Tk2 = POSTt_yr23 * i2_`Tk'
g POSTt_x_Tk3 = POSTt * i3_`Tk'
g POSTtyr1_x_Tk3 = POSTt_yr1 * i3_`Tk'
g POSTtyr23_x_Tk3 = POSTt_yr23 * i3_`Tk'
g POSTt_x_Tk4 = POSTt * i4_`Tk'
g POSTtyr1_x_Tk4 = POSTt_yr1 * i4_`Tk'
g POSTtyr23_x_Tk4 = POSTt_yr23 * i4_`Tk'
*continous measure of treatment 
g POSTt_x_Tk = POSTt * `Tk'  
g POSTtyr1_x_Tk = POSTt_yr1 * `Tk'  
g POSTtyr23_x_Tk = POSTt_yr23 * `Tk' 

* COSNTRAINT INTERACTIONS ----
g loInc = constrained_2017q2
g loColl = low_share_college_degree_2017q2

foreach x of varlist  loInc  loColl   {
* tripple interactions with  the continous
g POSTt_x_Tk_x_`x' = `x' * POSTt* `Tk'  
g POSTtyr1_x_Tk_x_`x' = `x' * POSTt_yr1* `Tk'  
g POSTtyr23_x_Tk_x_`x' = `x' * POSTt_yr23* `Tk'  
* tripple interactions with the top flooding group
g POSTt_x_Tk4_x_`x' = `x' * POSTt* i4_`Tk'  
g POSTtyr1_x_Tk4_x_`x' = `x' * POSTt_yr1* i4_`Tk'  
g POSTtyr23_x_Tk4_x_`x' = `x' * POSTt_yr23* i4_`Tk'  
* tripple interactions with the mid flooding group
g POSTt_x_Tk3_x_`x' = `x' * POSTt* i3_`Tk'  
g POSTtyr1_x_Tk3_x_`x' = `x' * POSTt_yr1* i3_`Tk'  
g POSTtyr23_x_Tk3_x_`x' = `x' * POSTt_yr23* i3_`Tk'  
* tripple interactions with the low flooding group
g POSTt_x_Tk2_x_`x' = `x' * POSTt* i2_`Tk'  
g POSTtyr1_x_Tk2_x_`x' = `x' * POSTt_yr1* i2_`Tk'  
g POSTtyr23_x_Tk2_x_`x' = `x' * POSTt_yr23* i2_`Tk'  
* controls needed when doing a tripple interaction
g POSTt_x_`x'  = `x' * POSTt
g POSTtyr1_x_`x'  = `x' * POSTt_yr1
g POSTtyr23_x_`x' = `x' * POSTt_yr23
* double interaction would be collinear with the block fixed effect, must interact with POST 
}
drop if nqtr < 23 // enforce balance
// To avoid downward bias on the average quarterly hazard estimate, we keep only the first 10 quarters after Harvey. 
eststo clear 
* BY AGE
foreach a in 1  {
eststo clear 
*(1)
eststo: quietly	reghdfe  ibalance  POSTt_x_Tk2 POSTt_x_Tk3 POSTt_x_Tk4 	$AGECUBICit $BCHAR_x_POST if    age__cut == `a'  & qtr2 < mdy(3,1,2020) , 	absorb( $FE0     ) cluster( $clust ) 
estadd local Samp  	 "A`a'&Yeq0" 
estadd local IBFE  	  "I"  
estadd local Blockdate  "Y" 
estadd ysumm
estadd scalar r = e(r2_a)
*(2)
eststo: quietly	 reghdfe  ibalance   POSTt_x_Tk2 POSTt_x_Tk3 POSTt_x_Tk4 POSTt_x_Tk4_x_loInc POSTt_x_loInc  $AGECUBICit $BCHAR_x_POST if age__cut == `a'  	 & qtr2 < mdy(3,1,2020) ,  	absorb( $FE0      ) 		 	cluster( $clust )  
estadd local Samp  	 "A`a'&Yeq0"
estadd local IBFE  	  "I"  
estadd local Blockdate  "Y" 
estadd ysumm
estadd scalar r = e(r2_a)
*(3)
eststo: quietly	 reghdfe  ibalance   POSTt_x_Tk2 POSTt_x_Tk3 POSTt_x_Tk4 POSTt_x_Tk4_x_loInc POSTt_x_loInc  $AGECUBICit $BCHAR_x_POST if age__cut == `a' & high_owneroccupied_2017q2==1  	 & qtr2 < mdy(3,1,2020) ,  	absorb( $FE0      ) 		 	cluster( $clust )  
estadd local Samp  	 "A`a'&O1&Yeq0"
estadd local IBFE  	  "I"  
estadd local Blockdate  "Y" 
estadd ysumm
estadd scalar r = e(r2_a)
*(4)
eststo: quietly	 reghdfe  ibalance   POSTt_x_Tk2 POSTt_x_Tk3 POSTt_x_Tk4 POSTt_x_Tk4_x_loColl POSTt_x_loColl  $AGECUBICit $BCHAR_x_POST if age__cut == `a'  	 & qtr2 < mdy(3,1,2020) ,  	absorb( $FE0      ) 		 	cluster( $clust )  
estadd local Samp  	 "A`a'&Yeq0"
estadd local IBFE  	  "I"  
estadd local Blockdate  "Y" 
estadd ysumm
estadd scalar r = e(r2_a)
*(5)
eststo: quietly	 reghdfe  ibalance   POSTt_x_Tk2 POSTt_x_Tk3 POSTt_x_Tk4 POSTt_x_Tk4_x_loColl POSTt_x_loColl  $AGECUBICit $BCHAR_x_POST if age__cut == `a'  & high_owneroccupied_2017q2==1 	 & qtr2 < mdy(3,1,2020) ,  	absorb( $FE0      ) 		 	cluster( $clust )  
estadd local Samp  	 "A`a'&O1&Yeq0"
estadd local IBFE  	  "I"  
estadd local Blockdate  "Y" 
estadd ysumm
estadd scalar r = e(r2_a)
*(6)
eststo: quietly	 reghdfe  ibalance   POSTt_x_Tk2 POSTt_x_Tk3 POSTt_x_Tk4 POSTt_x_Tk4_x_loColl POSTt_x_loColl  POSTt_x_Tk4_x_loInc POSTt_x_loInc  $AGECUBICit $BCHAR_x_POST if age__cut == `a'  & high_owneroccupied_2017q2==1 	 & qtr2 < mdy(3,1,2020) ,  	absorb( $FE0      ) 		 	cluster( $clust )  
estadd local Samp  	 "A`a'&O1&Yeq0"
estadd local IBFE  	  "I"  
estadd local Blockdate  "Y" 
estadd ysumm
estadd scalar r = e(r2_a)
* Table 3
		esttab using "$outty/STUD_Table3_DiD_ext_HAZARDBAL_A`a'_balance _`Tk'_20.csv", replace cells(b(star fmt(2)) t(par fmt(2)))   ///
		stats(N r ymean Samp IBFE Blockdate, label(N "AdjR2" "Y-mean" "Sample" "Indiv or Block F.E." "Block-Char. x Post") fmt(0 2 2 0 0 0))    ///
		drop( $AGECUBICit $BCHAR_x_POST  _cons ) order(POSTt_x_Tk2 POSTt_x_Tk3 POSTt_x_Tk4 POSTt_x_Tk4_x_loColl  POSTt_x_Tk4_x_loInc   POSTt_x_loColl   POSTt_x_loInc   )  ///
		star(* 0.10 ** 0.05 *** 0.01) compress 
} // close age
} // close treat

/*--------------------------------------------------------------------------------------*/





/* +++ */
/*!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
NONPARAMETRICS
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/
* Figure A10
use "$inny/ccp_gis_prepped_data_0STUD" , clear
global studentvars ibalance balance 
* NONPARAMETRIC GRAPHS
foreach Tk of varlist  w_flood_depth    {
drop if missing(`Tk')
capture drop ngroup
gen ngroup=1 & !missing(ngroup_`Tk')
replace ngroup=23 if inlist(ngroup_`Tk',2,3)
replace ngroup=4 if inlist(ngroup_`Tk',4)
drop if nqtr<23 // balanced

* BY PRIOR STUDENT DEBT 
foreach s in  0  1  {
 /* psuedo data will bug out here because it was only built with only non-zero student loan balances (there are no zero balances). 
  In the real CCP, there are of course lots of people without student debt as of the dawn of the hurricane. 
  To run this part, delete the 0 above so that it runs only on people with non-zerod student loan balances: ibalance_2017q2==1  */
/*-------------------------------------------------------------------------------------------------------------*/
* BY AGE SPLIT
foreach a in 1 3  {
foreach outcome of varlist $studentvars  {
preserve
collapse (mean) `outcome' if !missing(ngroup) & age_cut==`a'  & ibalance_2017q2==`s' , by(yq ngroup)  
sort yq
#d 
graph twoway (connected `outcome' yq if ngroup == 1 , lpattern(solid) lcolor(black) mcolor(black) msymbol(O)   xline(229, lcolor(black)) ) 
			 (connected `outcome' yq if ngroup == 23 , lpattern(dash) lcolor(red) mcolor(red) msymbol(Th)  ) 
			 (connected `outcome' yq if ngroup == 4 , lpattern(solid) lwidth(thick) lcolor(blue) mcolor(blue) msymbol(S)  ) ,
	  legend(order(1 "No flood" 2 "T1-T2 flood" 3 "T3 flood") col(1) pos(10) ring(0)) /* legend(off) */ scheme(s2mono)
	  graphregion(margin(small) color(white))   plotregion(fcolor(white)) graphregion(fcolor(white))
	  ytitle("Mean(`outcome')",size(medium)) xtitle("") title("S`s'_A`a'")
;
#d cr
graph save "$outty/nonpar_`outcome'_`Tk'_S`s'_A`a'.gph", replace
restore
} // close dependent

* BY INCOME 
foreach c in 1 {
foreach outcome of varlist $studentvars  {
preserve
collapse (mean) `outcome' if !missing(ngroup) & age_cut==`a'  & constrained_2017q2==`c'  & ibalance_2017q2==`s' , by(yq ngroup)  
sort yq
#d 
graph twoway (connected `outcome' yq if ngroup == 1 , lpattern(solid) lcolor(black) mcolor(black) msymbol(O)   xline(229, lcolor(black)) ) 
			 (connected `outcome' yq if ngroup == 23 , lpattern(dash) lcolor(red) mcolor(red) msymbol(Th)  ) 
			 (connected `outcome' yq if ngroup == 4 , lpattern(solid) lwidth(thick) lcolor(blue) mcolor(blue) msymbol(S)  ) ,
	  legend(order(1 "No flood" 2 "T1-T2 flood" 3 "T3 flood") col(1) pos(10) ring(0)) /* legend(off) */ scheme(s2mono)
	  graphregion(margin(small) color(white))   plotregion(fcolor(white)) graphregion(fcolor(white))
	  ytitle("Mean(`outcome')",size(medium)) xtitle("") title("S`s'_A`a'_H`c'")
;
#d cr
graph save "$outty/nonpar_`outcome'_`Tk'_S`s'_A`a'_H`c'.gph", replace
restore
} // close dependent
} // close income

* BY COLLEGE 
foreach c in 0  {
foreach outcome of varlist $studentvars  {
preserve
collapse (mean) `outcome' if !missing(ngroup) & age_cut==`a'  & high_share_college_degree_2017q2==`c'  & ibalance_2017q2==`s' , by(yq ngroup)  
sort yq
#d 
graph twoway (connected `outcome' yq if ngroup == 1 , lpattern(solid) lcolor(black) mcolor(black) msymbol(O)   xline(229, lcolor(black)) ) 
			 (connected `outcome' yq if ngroup == 23 , lpattern(dash) lcolor(red) mcolor(red) msymbol(Th)  ) 
			 (connected `outcome' yq if ngroup == 4 , lpattern(solid) lwidth(thick) lcolor(blue) mcolor(blue) msymbol(S)  ) ,
	  legend(order(1 "No flood" 2 "T1-T2 flood" 3 "T3 flood") col(1) pos(10) ring(0)) /* legend(off) */ scheme(s2mono)
	  graphregion(margin(small) color(white))   plotregion(fcolor(white)) graphregion(fcolor(white))
	  ytitle("Mean(`outcome')",size(medium)) xtitle("") title("S`s'_A`a'_C`c'")
;
#d cr
graph save "$outty/nonpar_`outcome'_`Tk'_S`s'_A`a'_C`c'.gph", replace
restore
} // close dependent
} // close college

* BY OWNEROCC 
foreach o in 1 {
foreach outcome of varlist $studentvars  {
preserve
collapse (mean) `outcome' if !missing(ngroup) & age_cut==`a'  & high_owneroccupied_2017q2==`o'  & ibalance_2017q2==`s' , by(yq ngroup)  
sort yq
#d 
graph twoway (connected `outcome' yq if ngroup == 1 , lpattern(solid) lcolor(black) mcolor(black) msymbol(O)   xline(229, lcolor(black)) ) 
			 (connected `outcome' yq if ngroup == 23 , lpattern(dash) lcolor(red) mcolor(red) msymbol(Th)  ) 
			 (connected `outcome' yq if ngroup == 4 , lpattern(solid) lwidth(thick) lcolor(blue) mcolor(blue) msymbol(S)  ) ,
	  legend(order(1 "No flood" 2 "T1-T2 flood" 3 "T3 flood") col(1) pos(10) ring(0)) /* legend(off) */ scheme(s2mono)
	  graphregion(margin(small) color(white))   plotregion(fcolor(white)) graphregion(fcolor(white))
	  ytitle("Mean(`outcome')",size(medium)) xtitle("") title("S`s'_A`a'_O`o'")
;
#d cr
graph save "$outty/nonpar_`outcome'_`Tk'_S`s'_A`a'_O`o'.gph", replace
restore
} // close dependent
} // close own

* BY college & FP500
foreach c in 0  {
foreach z in 0  {
foreach outcome of varlist $studentvars  {
preserve
collapse (mean) `outcome' if !missing(ngroup) & age_cut==`a' & fp_in_50pct_devblock_2017q2==`z' & high_share_college_degree_2017q2==`c'  & ibalance_2017q2==`s' , by(yq ngroup)  
sort yq
#d 
graph twoway (connected `outcome' yq if ngroup == 1 , lpattern(solid) lcolor(black) mcolor(black) msymbol(O)   xline(229, lcolor(black)) ) 
			 (connected `outcome' yq if ngroup == 23 , lpattern(dash) lcolor(red) mcolor(red) msymbol(Th)  ) 
			 (connected `outcome' yq if ngroup == 4 , lpattern(solid) lwidth(thick) lcolor(blue) mcolor(blue) msymbol(S)  ) ,
	  legend(order(1 "No flood" 2 "T1-T2 flood" 3 "T3 flood") col(1) pos(10) ring(0)) /* legend(off) */ scheme(s2mono)
	  graphregion(margin(small) color(white))   plotregion(fcolor(white)) graphregion(fcolor(white))
	  ytitle("Mean(`outcome')",size(medium)) xtitle("") title("S`s'_A`a'_C`c'_FP50`z'")
;
#d cr
graph save "$outty/nonpar_`outcome'_`Tk'_S`s'_A`a'_C`c'_FP50`z'.gph", replace
restore
} // close dependent
} // close FP500
} // close college


* BY college & HIGH OWNEROCC
foreach c in 0 {
foreach o in 1 {
foreach outcome of varlist $studentvars  {
preserve
collapse (mean) `outcome' if !missing(ngroup) & age_cut==`a' & high_owneroccupied_2017q2==`o'  & high_share_college_degree_2017q2 ==`c'  & ibalance_2017q2==`s' , by(yq ngroup)  
sort yq
#d 
graph twoway (connected `outcome' yq if ngroup == 1 , lpattern(solid) lcolor(black) mcolor(black) msymbol(O)   xline(229, lcolor(black)) ) 
			 (connected `outcome' yq if ngroup == 23 , lpattern(dash) lcolor(red) mcolor(red) msymbol(Th)  ) 
			 (connected `outcome' yq if ngroup == 4 , lpattern(solid) lwidth(thick) lcolor(blue) mcolor(blue) msymbol(S)  ) ,
	  legend(order(1 "No flood" 2 "T1-T2 flood" 3 "T3 flood") col(1) pos(10) ring(0)) /* legend(off) */ scheme(s2mono)
	  graphregion(margin(small) color(white))   plotregion(fcolor(white)) graphregion(fcolor(white))
	  ytitle("Mean(`outcome')",size(medium)) xtitle("") title("S`s'_A`a'_C`c'_O`o'")
;
#d cr
graph save "$outty/nonpar_`outcome'_`Tk'_S`s'_A`a'_C`c'_O`o'.gph", replace
restore
} // close dependent
} // close own 
} // close college


} // close age
} // close s
} // close treat


/* +++ */
/*--------------------------------------------------------------------*/
* FIGURE A8 AND A9.B
use "$inny/ccp_gis_prepped_data_0STUD" , clear
global studentvars0 ibalance balance 
* NONPARAMETRIC GRAPHS
foreach Tk of varlist  w_flood_depth     {
drop if missing(`Tk')
capture drop ngroup
* define treatment, lump the low and mid terciles of flooding together into one group for visual ease
gen ngroup=1 & !missing(ngroup_`Tk')
replace ngroup=23 if inlist(ngroup_`Tk',2,3)
replace ngroup=4 if inlist(ngroup_`Tk',4)
drop if nqtr<23 // balanced
* BY AGE SPLIT
foreach a in  1 3 { // only the first and third age groups are shown in the corresponding figure

* all not by treatment
foreach outcome of varlist $studentvars0  {
preserve
collapse (mean) `outcome' if !missing(ngroup) & age_cut==`a' , by(yq)  
sort yq
#d 
graph twoway (connected `outcome' yq  , lpattern(solid) lcolor(black) lwidth(thick) mcolor(black) msymbol(O)   yline(0, lcolor(black))  xline(228, lcolor(black) lpattern(dash)) )  ,
	   scheme(s2mono)  graphregion(margin(small) color(white))   plotregion(fcolor(white)) graphregion(fcolor(white))
	  ytitle("Mean(`outcome')",size(medium)) xtitle("") title("A`a'")
;
#d cr
graph save "$outty/nonpar_`outcome'_A`a'.gph", replace
restore
} // close dependent

* BY INCOME 
foreach c in 1 {
foreach outcome of varlist $studentvars  {
preserve
collapse (mean) `outcome' if !missing(ngroup) & age_cut==`a'  & constrained_2017q2==`c'   , by(yq ngroup)  
sort yq
#d 
graph twoway (connected `outcome' yq if ngroup == 1 , lpattern(solid) lcolor(black) mcolor(black) msymbol(O)   xline(229, lcolor(black)) ) 
			 (connected `outcome' yq if ngroup == 23 , lpattern(dash) lcolor(red) mcolor(red) msymbol(Th)  ) 
			 (connected `outcome' yq if ngroup == 4 , lpattern(solid) lwidth(thick) lcolor(blue) mcolor(blue) msymbol(S)  ) ,
	  legend(order(1 "No flood" 2 "T1-T2 flood" 3 "T3 flood") col(1) pos(10) ring(0)) /* legend(off) */ scheme(s2mono)
	  graphregion(margin(small) color(white))   plotregion(fcolor(white)) graphregion(fcolor(white))
	  ytitle("Mean(`outcome')",size(medium)) xtitle("") title("A`a'_H`c'")
;
#d cr
graph save "$outty/nonpar_`outcome'_`Tk'_A`a'_H`c'.gph", replace
restore
} // close dependent
} // close income

* BY college 
foreach c in 0 {
foreach outcome of varlist $studentvars  {
preserve
collapse (mean) `outcome' if !missing(ngroup) & age_cut==`a'  & high_share_college_degree_2017q2 ==`c'   , by(yq ngroup)  
sort yq
#d 
graph twoway (connected `outcome' yq if ngroup == 1 , lpattern(solid) lcolor(black) mcolor(black) msymbol(O)   xline(229, lcolor(black)) ) 
			 (connected `outcome' yq if ngroup == 23 , lpattern(dash) lcolor(red) mcolor(red) msymbol(Th)  ) 
			 (connected `outcome' yq if ngroup == 4 , lpattern(solid) lwidth(thick) lcolor(blue) mcolor(blue) msymbol(S)  ) ,
	  legend(order(1 "No flood" 2 "T1-T2 flood" 3 "T3 flood") col(1) pos(10) ring(0)) /* legend(off) */ scheme(s2mono)
	  graphregion(margin(small) color(white))   plotregion(fcolor(white)) graphregion(fcolor(white))
	  ytitle("Mean(`outcome')",size(medium)) xtitle("") title("A`a'_C`c'")
;
#d cr
graph save "$outty/nonpar_`outcome'_`Tk'_A`a'_C`c'.gph", replace
restore
} // close dependent
} // close college


* BY OWNEROCC 
foreach o in 1 {
foreach outcome of varlist $studentvars  {
preserve
collapse (mean) `outcome' if !missing(ngroup) & age_cut==`a'  & high_owneroccupied_2017q2==`o'   , by(yq ngroup)  
sort yq
#d 
graph twoway (connected `outcome' yq if ngroup == 1 , lpattern(solid) lcolor(black) mcolor(black) msymbol(O)   xline(229, lcolor(black)) ) 
			 (connected `outcome' yq if ngroup == 23 , lpattern(dash) lcolor(red) mcolor(red) msymbol(Th)  ) 
			 (connected `outcome' yq if ngroup == 4 , lpattern(solid) lwidth(thick) lcolor(blue) mcolor(blue) msymbol(S)  ) ,
	  legend(order(1 "No flood" 2 "T1-T2 flood" 3 "T3 flood") col(1) pos(10) ring(0)) /* legend(off) */ scheme(s2mono)
	  graphregion(margin(small) color(white))   plotregion(fcolor(white)) graphregion(fcolor(white))
	  ytitle("Mean(`outcome')",size(medium)) xtitle("") title("A`a'_O`o'")
;
#d cr
graph save "$outty/nonpar_`outcome'_`Tk'_A`a'_O`o'.gph", replace
restore
} // close dependent
} // close own 


* BY college & FP500
foreach c in 0  {
foreach z in 0 {
foreach outcome of varlist $studentvars  {
preserve
collapse (mean) `outcome' if !missing(ngroup) & age_cut==`a' & fp_in_50pct_devblock_2017q2==`z' & high_share_college_degree_2017q2==`c'   , by(yq ngroup)  
sort yq
#d 
graph twoway (connected `outcome' yq if ngroup == 1 , lpattern(solid) lcolor(black) mcolor(black) msymbol(O)   xline(229, lcolor(black)) ) 
			 (connected `outcome' yq if ngroup == 23 , lpattern(dash) lcolor(red) mcolor(red) msymbol(Th)  ) 
			 (connected `outcome' yq if ngroup == 4 , lpattern(solid) lwidth(thick) lcolor(blue) mcolor(blue) msymbol(S)  ) ,
	  legend(order(1 "No flood" 2 "T1-T2 flood" 3 "T3 flood") col(1) pos(10) ring(0)) /* legend(off) */ scheme(s2mono)
	  graphregion(margin(small) color(white))   plotregion(fcolor(white)) graphregion(fcolor(white))
	  ytitle("Mean(`outcome')",size(medium)) xtitle("") title("A`a'_C`c'_FP50`z'")
;
#d cr
graph save "$outty/nonpar_`outcome'_`Tk'_A`a'_C`c'_FP50`z'.gph", replace
restore
} // close dependent
} // close FP500
} // close college


* BY INCOME & OWNEROCC
foreach c in 1 {
foreach o in 1 {
foreach outcome of varlist $studentvars  {
preserve
collapse (mean) `outcome' if !missing(ngroup) & age_cut==`a' & high_owneroccupied_2017q2==`o'  & constrained_2017q2==`c' , by(yq ngroup)  
sort yq
#d 
graph twoway (connected `outcome' yq if ngroup == 1 , lpattern(solid) lcolor(black) mcolor(black) msymbol(O)   xline(229, lcolor(black)) ) 
			 (connected `outcome' yq if ngroup == 23 , lpattern(dash) lcolor(red) mcolor(red) msymbol(Th)  ) 
			 (connected `outcome' yq if ngroup == 4 , lpattern(solid) lwidth(thick) lcolor(blue) mcolor(blue) msymbol(S)  ) ,
	  legend(order(1 "No flood" 2 "T1-T2 flood" 3 "T3 flood") col(1) pos(10) ring(0)) /* legend(off) */ scheme(s2mono)
	  graphregion(margin(small) color(white))   plotregion(fcolor(white)) graphregion(fcolor(white))
	  ytitle("Mean(`outcome')",size(medium)) xtitle("") title("A`a'_H`c'_O`o'")
;
#d cr
graph save "$outty/nonpar_`outcome'_`Tk'_A`a'_H`c'_O`o'.gph", replace
restore
} // close dependent
} // close own 
} // close income


* BY college & OWNEROCC
foreach c in 0 {
foreach o in 1 {
foreach outcome of varlist $studentvars  {
preserve
collapse (mean) `outcome' if !missing(ngroup) & age_cut==`a' & high_owneroccupied_2017q2==`o'  & high_share_college_degree_2017q2 ==`c' , by(yq ngroup)  
sort yq
#d 
graph twoway (connected `outcome' yq if ngroup == 1 , lpattern(solid) lcolor(black) mcolor(black) msymbol(O)   xline(229, lcolor(black)) ) 
			 (connected `outcome' yq if ngroup == 23 , lpattern(dash) lcolor(red) mcolor(red) msymbol(Th)  ) 
			 (connected `outcome' yq if ngroup == 4 , lpattern(solid) lwidth(thick) lcolor(blue) mcolor(blue) msymbol(S)  ) ,
	  legend(order(1 "No flood" 2 "T1-T2 flood" 3 "T3 flood") col(1) pos(10) ring(0)) /* legend(off) */ scheme(s2mono)
	  graphregion(margin(small) color(white))   plotregion(fcolor(white)) graphregion(fcolor(white))
	  ytitle("Mean(`outcome')",size(medium)) xtitle("") title("A`a'_C`c'_O`o'")
;
#d cr
graph save "$outty/nonpar_`outcome'_`Tk'_A`a'_C`c'_O`o'.gph", replace
restore
} // close dependent
} // close own 
} // close college

} // close AGE
} // close TREAT
/*!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/


* Figure A8
#d
 			graph combine  
							"$outty/nonpar_balance_w_flood_depth_S1_A1" 
							"$outty/nonpar_balance_w_flood_depth_S1_A1_FP500" 
							"$outty/nonpar_balance_w_flood_depth_S1_A1_H1"   
							"$outty/nonpar_balance_w_flood_depth_S1_A1_C0"   
							"$outty/nonpar_balance_w_flood_depth_S1_A1_O1"   
							"$outty/nonpar_balance_w_flood_depth_S1_A1_C0_FP500"   
			, ycommon  xcommon rows(2) cols(3) fxsize(110)  fysize(90) altshrink  plotregion(fcolor(white)) graphregion(fcolor(white)) graphregion(margin(small) color(white)) imargin(5 5 2 2) // horizontal space between graphs
			title("",size(small))  play("$inny/STUD_IntNonPar_RFS1.grec") 
; 

* Figure A9.B
#d
 			graph combine  
							"$outty/nonpar_ibalance_w_flood_depth_A1" 
							"$outty/nonpar_ibalance_w_flood_depth_A1_H1"   
							"$outty/nonpar_ibalance_w_flood_depth_A1_C0"   
							"$outty/nonpar_ibalance_w_flood_depth_A1_O1"   
							"$outty/nonpar_ibalance_w_flood_depth_A1_C0_O1"   
			, ycommon  xcommon rows(1) cols(5) fxsize(180)  fysize(40) altshrink  plotregion(fcolor(white)) graphregion(fcolor(white)) graphregion(margin(small) color(white)) imargin(2 2 0 0) // horizontal space between graphs
			title("",size(small))  play("$inny/STUD_ExtNonPar_RFS1.grec") 
; 


* Figure A10
* A
#d
 			graph combine  
			"$outty/nonpar_ibalance_w_flood_depth_A3"   "$outty/nonpar_ibalance_w_flood_depth_A3_O1"  "$outty/nonpar_ibalance_w_flood_depth_A3_C0" 	"$outty/nonpar_ibalance_w_flood_depth_A1_C0"  	
			, ycommon  xcommon rows(1) cols(4) fxsize(150)  fysize(40) altshrink  plotregion(fcolor(white)) graphregion(fcolor(white)) graphregion(margin(small) color(white)) imargin(2 2 0 0) // horizontal space between graphs
			title("",size(small))   
; 
* B Conditional on non-zero balance
#d
 			graph combine  
			"$outty/nonpar_ibalance_w_flood_depth_S1_A3"   "$outty/nonpar_ibalance_w_flood_depth_S1_A3_O1"  "$outty/nonpar_ibalance_w_flood_depth_S1_A3_C0" 	"$outty/nonpar_ibalance_w_flood_depth_S1_A1_C0"  	
			, ycommon  xcommon rows(1) cols(4) fxsize(150)  fysize(40) altshrink  plotregion(fcolor(white)) graphregion(fcolor(white)) graphregion(margin(small) color(white)) imargin(2 2 0 0) // horizontal space between graphs
			title("",size(small))  
; 
* C Conditional on zero balance
#d
 			graph combine  
			"$outty/nonpar_ibalance_w_flood_depth_S0_A3"   "$outty/nonpar_ibalance_w_flood_depth_S0_A3_O1"  "$outty/nonpar_ibalance_w_flood_depth_S0_A3_C0" 	"$outty/nonpar_ibalance_w_flood_depth_S0_A1_C0"  	
			, ycommon  xcommon rows(1) cols(4) fxsize(150)  fysize(40) altshrink  plotregion(fcolor(white)) graphregion(fcolor(white)) graphregion(margin(small) color(white)) imargin(2 2 0 0) // horizontal space between graphs
			title("",size(small)) 
; 



